1 /*****************************************************************************
2    Major portions of this software are copyrighted by the Medical College
3    of Wisconsin, 1994-2000, and are released under the Gnu General Public
4    License, Version 2.  See the file README.Copyright for details.
5 ******************************************************************************/
6 
7 /*!
8   This file contains matrix and vector arithmetic routines.
9 
10   File:     matrix.c
11   Author:   B. Douglas Ward
12   Date:     23 April 1997
13 
14   Mod:      Changed print format for functions matrix_print and vector_print.
15   Date:     05 August 1997
16 
17   Mod:      Changed initialization in function vector_create.
18   Date:     04 November 1997
19 
20   Mod:      Added routines matrix_file_write and matrix_file_read.
21   Date:     02 July 1999
22 
23   Mod:      Added routine for calculating square root of matrix.
24   Date:     30 September 1999
25 
26   Mod:      Added routines matrix_sprint and vector_sprint.
27   Date:     04 October 1999
28 
29   Mod:      Modified matrix_file_read to use mri_read_ascii routine.
30   Date:     12 January 2000
31 
32   Mod:      Changed return type of vector_dot from float to double.
33   Date:     13 April 2000
34 
35   Mod:      Added functions column_to_vector and matrix_extract_rows.
36   Date:     21 April 2000
37 
38   Mod:      Added test for missing matrix file name.
39   Date:     08 May 2000
40 
41   Mod:      Added "register" declarations and a few other things to speed
42             up calculations (including vector_create_noinit) -- RWCox.
43   Date:     28 Dec 2001
44 
45   Mod:      Allow matrices and vectors with zero rows and columns.
46   Date:     26 February 2002
47 
48   Mod:      Corrected errors in vector_multiply and vector_multiply_subtract
49             routines that would produce segmentation fault for certain input
50 	    matrices.
51   Date:     18 March 2003
52 
53   Mod:      UNROLL_VECMUL defined to allow unrolling by 2 of vector-multiply
54             dot product loops.
55 
56   Mod:      'ipr' added to matrix_print() function.
57   Date:     03 Aug 2004 - RWCox
58 
59   Mod:      Added USE_SCSLBLAS stuff for SGI Altix, and USE_SUNPERF for Solaris.
60   Date:     01 Mar 2005
61 
62   Mod:      Freed memory for an orphaned matrix in matrix_sqrt function
63   Date:     26 Mar 2008 - drg
64 
65 */
66 
67 /*---------------------------------------------------------------------*/
68 /** Vectorization macros:
69    - DOTP(n,x,y,z) computes the n-long dot product of vectors
70        x and y and puts the result into the place pointed to by z.
71    - VSUB(n,x,y,z) computes vector x-y into vector z.
72    - These are intended to be the fast method for doing these things. **/
73 /*---------------------------------------------------------------------*/
74 
75 #include "mrilib.h"
76 #include "matrix.h"
77 
78 #undef SETUP_BLAS1  /* define this to use BLAS-1 functions */
79 #undef SETUP_BLAS2  /* define this to use BLAS-2 functions */
80 #undef DOTP
81 #undef VSUB
82 #undef MATVEC
83 #undef SUBMATVEC
84 
85 #if defined(USE_SCSLBLAS)                            /** SGI Altix **/
86 #  include <scsl_blas.h>
87 #  define SETUP_BLAS1
88 #  undef  SETUP_BLAS2     /* don't use this */
89 #  define TRANSA "T"
90 #elif defined(USE_SUNPERF)                           /** Sun Solaris **/
91 #  include <sunperf.h>
92 #  define SETUP_BLAS1
93 #  undef SETUP_BLAS2
94 #  define TRANSA 'T'
95 #elif defined(USE_ACML)
96 #  ifndef _ACML_COMPLEX
97 #  define _ACML_COMPLEX
98  /*    typedef struct { float real, imag; } complex; */
99  /*    typedef struct { double real, imag; } doublecomplex; */
100 #  endif
101 #  include <acml.h>
102 #  define SETUP_BLAS1
103 #endif
104 
105 /* double precision BLAS-1 functions */
106 
107 #ifdef SETUP_BLAS1
108 # define DOTP(n,x,y,z) *(z)=ddot(n,x,1,y,1)
109 # define VSUB(n,x,y,z) (memcpy(z,x,sizeof(double)*n),daxpy(n,-1.0,y,1,z,1))
110 #endif
111 
112 /*.......................................................................
113    BLAS-2 function operate on matrix-vector structs defined in matrix.h:
114 
115    MATVEC(m,v,z):    [z] = [m][v] where m=matrix, z,v = matrices
116    SUBMATVEC(m,v,z): [z] = [z] - [m][v]
117 .........................................................................*/
118 
119 #ifdef DONT_USE_MATRIX_MAT
120 # undef SETUP_BLAS2
121 #endif
122 
123 #ifdef SETUP_BLAS2  /* doesn't seem to help much */
124 
125 # define MATVEC(m,v,z) dgemv( TRANSA , (m).cols , (m).rows ,       \
126                               1.0 , (m).mat , (m).cols ,           \
127                               (v).elts , 1 , 0.0 , (z).elts , 1 )
128 
129 # define SUBMATVEC(m,v,z) dgemv( TRANSA , (m).cols , (m).rows ,      \
130                                  -1.0 , (m).mat , (m).cols ,         \
131                                  (v).elts , 1 , 1.0 , (z).elts , 1 )
132 #endif
133 
134 /*---------------------------------------------------------------------------*/
135 static double flops=0.0 ;
get_matrix_flops(void)136 double get_matrix_flops(void){ return flops; }
137 
138 static double dotnum=0.0 , dotsum=0.0 ;
get_matrix_dotlen(void)139 double get_matrix_dotlen(void){ return (dotnum > 0.0) ? dotsum/dotnum : 0.0 ; }
140 
141 #ifndef USE_OMP
142 # define ENABLE_FLOPS
143 #else
144 # undef  ENABLE_FLOPS
145 #endif
146 
147 /*---------------------------------------------------------------------------*/
148 /*!
149   Routine to print and error message and stop.
150 */
151 
matrix_error(char * message)152 void matrix_error (char *message)
153 {
154   printf ("Matrix error: %s \n", message);
155   EXIT (1);
156 }
157 
158 
159 /*---------------------------------------------------------------------------*/
160 /*!
161   Initialize matrix data structure.
162 */
163 
matrix_initialize(matrix * m)164 void matrix_initialize (matrix *m)
165 {
166   m->rows = 0;
167   m->cols = 0;
168   m->elts = NULL;
169 #ifndef DONT_USE_MATRIX_MAT
170   m->mat  = NULL ;  /* 04 Mar 2005 */
171 #endif
172 }
173 
174 
175 /*---------------------------------------------------------------------------*/
176 /*!
177   Destroy matrix data structure by deallocating memory.
178 */
179 
matrix_destroy(matrix * m)180 void matrix_destroy (matrix *m)
181 {
182   if (m->elts != NULL){
183 #ifdef DONT_USE_MATRIX_MAT
184     int i ;
185     for( i=0 ; i < m->rows ; i++ )
186       if( m->elts[i] != NULL ) free(m->elts[i]) ;
187 #endif
188     free(m->elts) ;
189   }
190 #ifndef DONT_USE_MATRIX_MAT
191   if( m->mat  != NULL) free (m->mat ) ;
192 #endif
193   matrix_initialize (m);
194 }
195 
196 /*---------------------------------------------------------------------------*/
197 /*!
198   Create matrix data structure by allocating memory and initializing values.
199 
200 */
201 
matrix_create(int rows,int cols,matrix * m)202 void matrix_create (int rows, int cols, matrix *m)
203 {
204   register int i ;
205 
206   matrix_destroy (m);
207 
208   if ((rows < 0) || (cols < 0))
209     matrix_error ("Illegal dimensions for new matrix");
210 
211   m->rows = rows;
212   m->cols = cols;
213   if ((rows < 1) || (cols < 1))  return;
214 
215   m->elts = (double **) malloc (sizeof(double *) * rows);
216   if (m->elts == NULL)
217     matrix_error ("Memory allocation error");
218 
219 #ifdef DONT_USE_MATRIX_MAT
220   for (i = 0;  i < rows;  i++){
221     m->elts[i] = (double *) calloc (sizeof(double) , cols);
222     if (m->elts[i] == NULL) matrix_error ("Memory allocation error");
223   }
224 #else
225   m->mat  = (double *) calloc( sizeof(double) , rows*cols ) ;
226   if( m->mat == NULL )
227     matrix_error ("Memory allocation error");
228   for (i = 0;  i < rows;  i++)
229      m->elts[i] = m->mat + (i*cols) ;   /* 04 Mar 2005: offsets into mat */
230 #endif
231 }
232 
233 
234 /*---------------------------------------------------------------------------*/
235 /*!
236   Print contents of matrix m.
237 */
238 
matrix_print(matrix m)239 void matrix_print (matrix m)
240 {
241   int i=0, j=0;
242   int rows, cols;
243   double val ;
244   int ipr ;
245 
246   rows = m.rows; if( rows < 1 ) return ;
247   cols = m.cols; if( cols < 1 ) return ;
248 
249   for( i=0 ; i < rows ; i++ ){
250     for( j=0 ; j < cols ; j++ ){
251       val = (int)m.elts[i][j] ;
252       if( val != m.elts[i][j] || fabs(val) > 99.0 ) goto zork ;
253     }
254   }
255 zork:
256   ipr = (i==rows && j==cols) ;
257 
258   for (i = 0;  i < rows;  i++)
259     {
260       for (j = 0;  j < cols;  j++)
261         if( ipr ) printf (" %3d"   , (int)m.elts[i][j]);
262         else      printf (" %10.4g", m.elts[i][j]);
263       printf (" \n");
264     }
265   printf (" \n"); fflush(stdout) ;
266 }
267 
268 
269 /*---------------------------------------------------------------------------*/
270 /*!
271   Print label and contents of matrix m.
272 */
273 
matrix_sprint(char * s,matrix m)274 void matrix_sprint (char * s, matrix m)
275 {
276   printf ("%s \n", s);
277   matrix_print (m);
278 }
279 
280 
281 /*---------------------------------------------------------------------------*/
282 /*!
283   Print contents of matrix m to specified file.
284 */
285 
matrix_file_write(char * filename,matrix m)286 void matrix_file_write (char * filename, matrix m)
287 {
288   int i, j;
289   int rows, cols;
290   FILE * outfile = NULL;
291 
292 
293   /*----- First, check for empty file name -----*/
294   if (filename == NULL)  matrix_error ("Missing matrix file name");
295 
296 
297   outfile = fopen (filename, "w");
298 
299   rows = m.rows;
300   cols = m.cols;
301 
302   for (i = 0;  i < rows;  i++)
303     {
304       for (j = 0;  j < cols;  j++)
305         fprintf (outfile, "  %g", m.elts[i][j]);
306       fprintf (outfile, " \n");
307     }
308   fprintf (outfile, " \n");
309 
310   fclose (outfile);
311 }
312 
313 /*---------------------------------------------------------------------------*/
314 /*!
315   Manual entry of matrix data.
316 */
317 
matrix_enter(matrix * m)318 void matrix_enter (matrix * m)
319 {
320   int rows, cols;
321   int i, j;
322   float fval;
323 
324   printf ("Enter number of rows: "); fflush(stdout) ;
325   scanf ("%d", &rows);
326   printf ("Enter number of cols: "); fflush(stdout) ;
327   scanf ("%d", &cols);
328 
329   matrix_create (rows, cols, m);
330 
331   for (i = 0;  i < rows;  i++)
332     for (j = 0;  j < cols;  j++)
333       {
334 	     printf ("elts[%d][%d] = ", i, j); fflush(stdout);
335 	     scanf ("%f", &fval);
336 	     m->elts[i][j] = fval;
337       }
338 }
339 
340 
341 /*---------------------------------------------------------------------------*/
342 /*!
343   Read contents of matrix m from specified file.
344   If unable to read matrix from file, or matrix has wrong dimensions:
345      If error_exit flag is set, then print error message and exit.
346      Otherwise, return null matrix.
347 */
348 
matrix_file_read(char * filename,int rows,int cols,matrix * m,int error_exit)349 void matrix_file_read (char * filename, int rows, int cols,  matrix * m,
350 		       int error_exit)
351 {
352   int i, j;
353 
354   MRI_IMAGE * flim;  /* pointer to image structure
355 			      -- used to read ASCII file */
356   float * far;             /* pointer to MRI_IMAGE floating point data */
357   char message [80];       /* error message */
358 
359 
360   /*----- First, check for empty file name -----*/
361   if (filename == NULL)  matrix_error ("Missing matrix file name");
362 
363 
364   /*----- Read the matrix file -----*/
365   flim = mri_read_1D(filename);
366   if (flim == NULL) {
367        if (error_exit)
368          {
369 	   sprintf (message,  "Unable to read matrix from file: %s",  filename);
370 	   matrix_error (message);
371          }
372        else
373          {
374 	   matrix_destroy (m);
375 	   return;
376          }
377   }
378 
379 
380   /*----- Set pointer to data  -----*/
381   far = MRI_FLOAT_PTR(flim);
382 
383 
384   /*----- Test for correct dimensions -----*/
385   if ( (rows != flim->nx) || (cols != flim->ny) ) {
386        if (error_exit)
387          {
388 	   sprintf (message,
389 		    "In matrix file: %s   Expected: %d x %d   Actual: %d x %d",
390 		    filename, rows, cols, flim->nx, flim->ny);
391 	   matrix_error (message);
392          }
393        else
394          {
395 	   matrix_destroy (m);
396 	   return;
397          }
398    }
399 
400 
401   matrix_create (rows, cols, m);
402 
403 
404   /*----- Copy data from image structure to matrix structure -----*/
405   for (i = 0;  i < rows;  i++)
406     for (j = 0;  j < cols;  j++)
407       m->elts[i][j] = far[i + j*rows];
408 
409 
410   { mri_free (flim);  flim = NULL; }
411 
412 }
413 
414 
415 /*---------------------------------------------------------------------------*/
416 /*!
417   Convert simple array to matrix structure.
418 */
419 
array_to_matrix(int rows,int cols,float ** f,matrix * m)420 void array_to_matrix (int rows, int cols, float ** f, matrix * m)
421 {
422   register int i, j;
423 
424   matrix_create (rows, cols, m);
425 
426   for (i = 0;  i < rows;  i++)
427     for (j = 0;  j < cols;  j++)
428       m->elts[i][j] = f[i][j];
429 }
430 
431 
432 /*---------------------------------------------------------------------------*/
433 /*!
434   Make a copy of the first matrix, return copy as the second matrix.
435 */
436 
matrix_equate(matrix a,matrix * b)437 void matrix_equate (matrix a, matrix * b)
438 {
439   register int i;
440   register int rows, cols;
441 
442   rows = a.rows;
443   cols = a.cols;
444 
445   matrix_create (rows, cols, b);
446 
447   for (i = 0;  i < rows;  i++){
448 #if 0
449     for (j = 0;  j < cols;  j++)
450       b->elts[i][j] = a.elts[i][j];
451 #else
452     if( cols > 0 )
453       memcpy( b->elts[i] , a.elts[i] , sizeof(double)*cols ) ;  /* RWCox */
454 #endif
455   }
456 }
457 
458 
459 /*---------------------------------------------------------------------------*/
460 /*!
461     Extending a matrix with nradd rows and ncadd columns (zero-filled).
462 */
463 
matrix_enlarge(int nradd,int ncadd,matrix * a)464 void matrix_enlarge( int nradd , int ncadd , matrix *a )
465 {
466   int i , rows, cols ;
467   matrix *b ;
468 
469   if( ncadd < 0 ) ncadd = 0 ;  /* no subtraction allowed */
470   if( nradd < 0 ) nradd = 0 ;
471 
472   if( nradd == 0 && ncadd == 0 ) return ;  /* nothing to do? */
473 
474   rows = a->rows ; cols = a->cols ;
475 
476   /* create bigger matrix with extra rows/columns */
477 
478   b = (matrix *)malloc(sizeof(matrix)) ;
479   matrix_initialize( b ) ;
480   matrix_create( rows+nradd , cols+ncadd, b ) ;  /* zero-filled */
481 
482   /* copy row data from a into b */
483 
484   if( cols > 0 ){
485     for( i=0 ; i < rows ; i++ ){
486       memcpy( b->elts[i] , a->elts[i] , sizeof(double)*cols ) ;
487     }
488   }
489 
490   /* destroy contents of a, replace with contents of b */
491 
492   matrix_destroy(a) ; *a = *b ; return ;
493 }
494 
495 
496 /*---------------------------------------------------------------------------*/
497 /*!
498   Extract p columns (specified by list) from matrix a.  Result is matrix b.
499 */
500 
matrix_extract(matrix a,int p,int * list,matrix * b)501 void matrix_extract (matrix a, int p, int *list, matrix * b)
502 {
503   register int i, j;
504   register int rows, cols;
505 
506   rows = a.rows;
507   cols = p;
508 
509   matrix_create (rows, cols, b);
510 
511   for (i = 0;  i < rows;  i++)
512     for (j = 0;  j < cols;  j++)
513       b->elts[i][j] = a.elts[i][list[j]];
514 }
515 
516 
517 /*---------------------------------------------------------------------------*/
518 /*!
519   Extract p rows (specified by list) from matrix a.  Result is matrix b.
520 */
521 
matrix_extract_rows(matrix a,int p,int * list,matrix * b)522 void matrix_extract_rows (matrix a, int p, int * list, matrix * b)
523 {
524   register int i, j;
525   register int rows, cols;
526 
527   rows = p;
528   cols = a.cols;
529 
530   matrix_create (rows, cols, b);
531 
532   for (i = 0;  i < rows;  i++)
533     for (j = 0;  j < cols;  j++)
534       b->elts[i][j] = a.elts[list[i]][j];
535 }
536 
537 /*---------------------------------------------------------------------------*/
538 /*!
539   Add columns that are all 0 except for a single 1, as indicated. [16 Aug 2019]
540   For use in 3dDeconvolve experimentation [RWC].
541 */
542 
matrix_augment_01_columns(matrix a,int nadd,int * addlist,matrix * b)543 void matrix_augment_01_columns( matrix a, int nadd, int *addlist, matrix *b )
544 {
545    int arows,brows , acols,bcols , i,j,k,aa ;
546 
547    if( b == NULL ) return ;   /* bad input */
548 
549    if( nadd <= 0 || addlist == NULL ){ /* nothing to do but copy input */
550      matrix_equate(a,b) ; return ;
551    }
552 
553    /* check addlist for bad entries */
554 
555    for( aa=0 ; aa < nadd ; aa++ ){
556      if( addlist[aa] < 0 || addlist[aa] >= a.rows ){
557        fprintf(stderr,"** ERROR: bad index in matrix_augment_01_columns\n") ;
558        return ;
559      }
560    }
561 
562    arows = brows = a.rows ;
563    acols = a.cols ;
564    bcols = acols + nadd ;
565 
566    matrix_create( brows , bcols , b ) ;
567 
568    /* copy original part */
569 
570    for( i=0 ; i < brows ; i++ )
571      for( j=0 ; j < acols ; j++ ) b->elts[i][j] = a.elts[i][j] ;
572 
573    /* add new cols */
574 
575    for( aa=0 ; aa < nadd ; aa++ ){
576      k = addlist[aa] ; j = acols+aa ;
577      for( i=0 ; i < brows ; i++ )
578        b->elts[i][j] = (i==k) ? 1.0 : 0.0 ;
579    }
580 
581    return ;
582 }
583 
584 /*---------------------------------------------------------------------------*/
585 /*! Return value is number of rows deleted.
586     If this is zero, the output matrix *b is not created.
587     If the output matrix would have no rows
588       (return value == number of rows input), then *b is also not created.
589 *//*-------------------------------------------------------------------------*/
590 
matrix_delete_allzero_rows(matrix a,matrix * b)591 int matrix_delete_allzero_rows( matrix a , matrix *b )
592 {
593    int ii , jj , rows , cols , np=0 , *lp ;
594 
595    rows = a.rows ;
596    cols = a.cols ; if( rows < 1 || cols < 1 || b == NULL ) return 0 ;
597 
598    /* make list of rows to keep */
599 
600    lp = (int *)malloc(sizeof(int)*rows) ;
601    for( ii=0 ; ii < rows ; ii++ ){
602      for( jj=0 ; jj < cols && a.elts[ii][jj] == 0.0 ; jj++ ) ; /*nada*/
603      if( jj < cols ) lp[np++] = ii ;
604    }
605 
606    if( np > 0 && np < rows )
607      matrix_extract_rows( a , np , lp , b ) ;
608 
609    free(lp) ; return (rows-np) ;
610 }
611 
612 /*---------------------------------------------------------------------------*/
613 /*!
614   Create n x n identity matrix.
615 */
616 
matrix_identity(int n,matrix * m)617 void matrix_identity (int n, matrix * m)
618 {
619   register int i, j;
620 
621   if (n < 0)
622     matrix_error ("Illegal dimensions for identity matrix");
623 
624   matrix_create (n, n, m);
625 
626   for (i = 0;  i < n;  i++)
627     for (j = 0;  j < n;  j++)
628       if (i == j)
629 	m->elts[i][j] = 1.0;
630       else
631 	m->elts[i][j] = 0.0;
632 }
633 
634 
635 /*---------------------------------------------------------------------------*/
636 /*!
637   Add matrix a to matrix b.  Result is matrix c.
638 */
639 
matrix_add(matrix a,matrix b,matrix * c)640 void matrix_add (matrix a, matrix b, matrix * c)
641 {
642   register int rows, cols;
643   register int i, j;
644 
645   if ((a.rows != b.rows) || (a.cols != b.cols))
646     matrix_error ("Incompatible dimensions for matrix addition");
647 
648   rows = a.rows;
649   cols = a.cols;
650 
651   matrix_create (rows, cols, c);
652 
653   for (i = 0;  i < rows;  i++)
654     for (j = 0;  j < cols;  j++)
655       c->elts[i][j] = a.elts[i][j] + b.elts[i][j];
656 
657 #ifdef ENABLE_FLOPS
658   flops += rows*cols ;
659 #endif
660 }
661 
662 
663 /*---------------------------------------------------------------------------*/
664 /*!
665   Subtract matrix b from matrix a.  Result is matrix c.
666 */
667 
matrix_subtract(matrix a,matrix b,matrix * c)668 void matrix_subtract (matrix a, matrix b, matrix * c)
669 {
670   register int rows, cols;
671   register int i, j;
672 
673   if ((a.rows != b.rows) || (a.cols != b.cols))
674     matrix_error ("Incompatible dimensions for matrix subtraction");
675 
676   rows = a.rows;
677   cols = a.cols;
678 
679   matrix_create (rows, cols, c);
680 
681   for (i = 0;  i < rows;  i++)
682     for (j = 0;  j < cols;  j++)
683       c->elts[i][j] = a.elts[i][j] - b.elts[i][j];
684 
685 #ifdef ENABLE_FLOPS
686   flops += rows*cols ;
687 #endif
688 }
689 
690 
691 /*---------------------------------------------------------------------------*/
692 /*!
693   Multiply matrix a by matrix b.  Result is matrix c.
694 */
695 
matrix_multiply(matrix a,matrix b,matrix * c)696 void matrix_multiply (matrix a, matrix b, matrix * c)
697 {
698   int rows, cols;
699   register int i, j, k;
700   register double sum ;
701 
702   if (a.cols != b.rows)
703     matrix_error ("Incompatible dimensions for matrix multiplication");
704 
705   rows = a.rows;
706   cols = b.cols;
707 
708   matrix_create (rows, cols, c);
709 
710   for (i = 0;  i < rows;  i++)
711     for (j = 0;  j < cols;  j++)
712       {
713         sum = 0.0 ;
714 	for (k = 0;  k < a.cols;  k++)
715 	  sum += a.elts[i][k] * b.elts[k][j];
716         c->elts[i][j] = sum;
717       }
718 
719 #ifdef ENABLE_FLOPS
720   flops += 2.0*rows*cols*cols ;
721 #endif
722 }
723 
724 
725 /*---------------------------------------------------------------------------*/
726 /*!
727   Multiply matrix a by scalar constant k.  Result is matrix c.
728 */
729 
matrix_scale(double k,matrix a,matrix * c)730 void matrix_scale (double k, matrix a, matrix * c)
731 {
732   register int rows, cols;
733   register int i, j;
734 
735   rows = a.rows;
736   cols = a.cols;
737 
738   matrix_create (rows, cols, c);
739 
740   for (i = 0;  i < rows;  i++)
741     for (j = 0;  j < cols;  j++)
742       c->elts[i][j] = k * a.elts[i][j];
743 
744 #ifdef ENABLE_FLOPS
745   flops += rows*cols ;
746 #endif
747 }
748 
749 
750 /*---------------------------------------------------------------------------*/
751 /*!
752   Take transpose of matrix a.  Result is matrix t.
753 */
754 
matrix_transpose(matrix a,matrix * t)755 void matrix_transpose (matrix a, matrix * t)
756 {
757   register int rows, cols;
758   register int i, j;
759 
760   rows = a.cols;
761   cols = a.rows;
762 
763   matrix_create (rows, cols, t);
764   for (i = 0;  i < rows;  i++)
765     for (j = 0;  j < cols;  j++)
766       t->elts[i][j] = a.elts[j][i];
767 }
768 
769 /*---------------------------------------------------------------------------*/
770 /*!
771   Use Gaussian elimination to calculate inverse of matrix a.  Result is
772   matrix ainv.
773 */
774 
matrix_inverse(matrix a,matrix * ainv)775 int matrix_inverse (matrix a, matrix * ainv)
776 {
777   const double epsilon = 1.0e-10;
778   matrix tmp;
779   register int i, j, ii, n;
780   register double fval;
781   register double fmax;
782   register double * p;
783 
784   matrix_initialize (&tmp);
785 
786 
787   if (a.rows != a.cols)
788     matrix_error ("Illegal dimensions for matrix inversion");
789 
790 #if 0
791 matrix_sprint("matrix_inverse:",a) ;
792 #endif
793 
794   n = a.rows;
795   matrix_identity (n, ainv);
796   matrix_equate (a, &tmp);
797 
798   for (i = 0;  i < n;  i++)
799     {
800       fmax = fabs(tmp.elts[i][i]);
801       for (j = i+1;  j < n;  j++)
802 	if (fabs(tmp.elts[j][i]) > fmax)
803 	  {
804 	    fmax = fabs(tmp.elts[j][i]);
805 	    p = tmp.elts[i];
806 	    tmp.elts[i] = tmp.elts[j];
807 	    tmp.elts[j] = p;
808 	    p = ainv->elts[i];
809 	    ainv->elts[i] = ainv->elts[j];
810 	    ainv->elts[j] = p;
811 	  }
812 
813       if (fmax < epsilon)
814 	{
815 	  matrix_destroy (&tmp);
816 	  return (0);
817 	}
818 
819 
820       fval = 1.0 / tmp.elts[i][i];   /* RWCox: change division by this to */
821       for (j = 0;  j < n;  j++)      /*        multiplication by 1.0/this */
822 	{
823 	  tmp.elts[i][j]   *= fval;
824 	  ainv->elts[i][j] *= fval;
825 	}
826       for (ii = 0;  ii < n;  ii++)
827 	if (ii != i)
828 	  {
829 	    fval = tmp.elts[ii][i];
830 	    for (j = 0;  j < n;  j++)
831 	      {
832 		tmp.elts[ii][j] -= fval*tmp.elts[i][j];
833 		ainv->elts[ii][j] -= fval*ainv->elts[i][j];
834 	      }
835 	  }
836 
837     }
838 
839   matrix_destroy (&tmp);
840 
841 #ifdef ENABLE_FLOPS
842   flops += 3.0*n*n*n ;
843 #endif
844   return (1);
845 }
846 
847 
848 /*---------------------------------------------------------------------------*/
849 /*!
850   Use Gaussian elimination to calculate inverse of matrix a, with diagonal
851   scaling applied for stability.  Result is matrix ainv.
852 */
853 
matrix_inverse_dsc(matrix a,matrix * ainv)854 int matrix_inverse_dsc (matrix a, matrix * ainv)  /* 15 Jul 2004 - RWCox */
855 {
856   matrix atmp;
857   register int i, j, n;
858   register double *diag ;
859   int mir ;
860 
861   if (a.rows != a.cols)
862     matrix_error ("Illegal dimensions for matrix inversion");
863 
864   matrix_initialize (&atmp);
865 
866   n = a.rows;
867   matrix_equate (a, &atmp);
868   diag = (double *)malloc( sizeof(double)*n ) ;
869   for( i=0 ; i < n ; i++ ){
870     diag[i] = fabs(atmp.elts[i][i]) ;
871     if( diag[i] == 0.0 ) diag[i] = 1.0 ;  /* shouldn't happen? */
872     diag[i] = 1.0 / sqrt(diag[i]) ;
873   }
874 
875   for( i=0 ; i < n ; i++ )                /* scale a */
876    for( j=0 ; j < n ; j++ )
877     atmp.elts[i][j] *= diag[i]*diag[j] ;
878 
879   mir = matrix_inverse( atmp , ainv ) ;   /* invert */
880 
881   for( i=0 ; i < n ; i++ )                /* scale inverse */
882    for( j=0 ; j < n ; j++ )
883     ainv->elts[i][j] *= diag[i]*diag[j] ;
884 
885   matrix_destroy (&atmp);
886   free((void *)diag) ;
887 #ifdef ENABLE_FLOPS
888   flops += 4.0*n*n + 4.0*n ;
889 #endif
890   return (mir);
891 }
892 
893 
894 /*---------------------------------------------------------------------------*/
895 /*!
896   Calculate square root of symmetric positive definite matrix a.
897   Result is matrix s.
898 */
899 
matrix_sqrt(matrix a,matrix * s)900 int matrix_sqrt (matrix a, matrix * s)
901 {
902   const int MAX_ITER = 100;
903   int n;
904   int ok;
905   int iter;
906   register float sse, psse;
907   register int i, j;
908   matrix x, xinv, axinv, xtemp, error;
909 
910   matrix_initialize (&x);
911   matrix_initialize (&xinv);
912   matrix_initialize (&axinv);
913   matrix_initialize (&xtemp);
914   matrix_initialize (&error);
915 
916 
917   if (a.rows != a.cols)
918     matrix_error ("Illegal dimensions for matrix square root");
919 
920 
921   n = a.rows;
922   matrix_identity (n, &x);
923 
924 
925   psse = 1.0e+30;
926   for (iter = 0;  iter < MAX_ITER;  iter++)
927     {
928       ok = matrix_inverse (x, &xinv);
929       if (! ok)  return (0);
930       matrix_multiply (a, xinv, &axinv);
931       matrix_add (x, axinv, &xtemp);
932       matrix_scale (0.5, xtemp, &x);
933 
934       matrix_multiply (x, x, &xtemp);
935       matrix_subtract (a, xtemp, &error);
936       sse = 0.0;
937       for (i = 0;  i < n;  i++)
938 	for (j = 0;  j < n;  j++)
939 	  sse += error.elts[i][j] * error.elts[i][j] ;
940 
941       if (sse >= psse) break;
942 
943       psse = sse;
944     }
945 
946   if (iter == MAX_ITER)  return (0);
947 
948   matrix_equate (x, s);
949 
950   matrix_destroy (&x);
951   matrix_destroy (&xinv);
952   matrix_destroy (&axinv);
953   matrix_destroy (&xtemp);
954   matrix_destroy (&error);   /* destroy error matrix too  - 26 Mar 2008 drg */
955 
956   return (1);
957 
958 }
959 
960 
961 /*---------------------------------------------------------------------------*/
962 /*!
963   Initialize vector data structure.
964 */
965 
vector_initialize(vector * v)966 void vector_initialize (vector * v)
967 {
968   v->dim = 0;
969   v->elts = NULL;
970 }
971 
972 
973 /*---------------------------------------------------------------------------*/
974 /*!
975   Destroy vector data structure by deallocating memory.
976 */
977 
vector_destroy(vector * v)978 void vector_destroy (vector * v)
979 {
980   if (v->elts != NULL) free (v->elts);
981   vector_initialize (v);
982 }
983 
984 
985 /*---------------------------------------------------------------------------*/
986 /*!
987   Create vector v by allocating memory and initializing values.
988 */
989 
vector_create(int dim,vector * v)990 void vector_create (int dim, vector * v)
991 {
992   vector_destroy (v);
993 
994   if (dim < 0)  matrix_error ("Illegal dimensions for new vector");
995 
996   v->dim = dim;
997   if (dim < 1)  return;
998 
999   v->elts = (double *) calloc (sizeof(double) , dim);
1000   if (v->elts == NULL)
1001     matrix_error ("Memory allocation error");
1002 
1003 }
1004 
1005 /*---------------------------------------------------------------------------*/
vector_create_noinit(int dim,vector * v)1006 void vector_create_noinit(int dim, vector * v)  /* 28 Dec 2001: RWCox */
1007 {
1008   vector_destroy (v);
1009 
1010   if (dim < 0)  matrix_error ("Illegal dimensions for new vector");
1011 
1012   v->dim = dim;
1013   if (dim < 1)  return;
1014 
1015   v->elts = (double *) malloc (sizeof(double) * dim);
1016   if (v->elts == NULL)
1017     matrix_error ("Memory allocation error");
1018 }
1019 
1020 /*---------------------------------------------------------------------------*/
1021 /*!
1022   Print contents of vector v.
1023 */
1024 
vector_print(vector v)1025 void vector_print (vector v)
1026 {
1027   int i;
1028 
1029   for (i = 0;  i < v.dim;  i++)
1030     printf ("  %10.4g \n", v.elts[i]);
1031   printf (" \n"); fflush(stdout);
1032 
1033 }
1034 
1035 
1036 /*---------------------------------------------------------------------------*/
1037 /*!
1038   Print label and contents of vector v.
1039 */
1040 
vector_sprint(char * s,vector v)1041 void vector_sprint (char * s, vector v)
1042 {
1043   printf ("%s \n", s);
1044 
1045   vector_print (v);
1046 }
1047 
1048 
1049 /*---------------------------------------------------------------------------*/
1050 /*!
1051   Copy vector a.  Result is vector b.
1052 */
1053 
vector_equate(vector a,vector * b)1054 void vector_equate (vector a, vector * b)
1055 {
1056   register int dim;
1057 
1058   dim = a.dim;
1059 
1060   vector_create_noinit (dim, b);
1061 
1062 #if 0
1063   for (i = 0;  i < dim;  i++)
1064     b->elts[i] = a.elts[i];
1065 #else
1066   if( dim > 0 )
1067     memcpy( b->elts , a.elts , sizeof(double)*dim ) ;  /* RWCox */
1068 #endif
1069 }
1070 
1071 
1072 /*---------------------------------------------------------------------------*/
1073 /*!
1074   Convert simple array f into vector v.
1075 */
1076 
array_to_vector(int dim,float * f,vector * v)1077 void array_to_vector (int dim, float * f, vector * v)
1078 {
1079   register int i;
1080 
1081   vector_create_noinit (dim, v);
1082 
1083   for (i = 0;  i < dim;  i++)
1084     v->elts[i] = f[i];
1085 }
1086 
1087 
1088 
1089 /*---------------------------------------------------------------------------*/
1090 /*!
1091   Convert column c of matrix m into vector v.
1092 */
1093 
column_to_vector(matrix m,int c,vector * v)1094 void column_to_vector (matrix m, int c, vector *v)
1095 {
1096   register int i , dim ;
1097   dim = m.rows; vector_create_noinit (dim, v);
1098   for (i = 0;  i < dim;  i++) v->elts[i] = m.elts[i][c];
1099 }
1100 
row_to_vector(matrix m,int r,vector * v)1101 void row_to_vector( matrix m , int r , vector *v )
1102 {
1103    register int j , dim ;
1104    dim = m.cols ; vector_create_noinit(dim,v) ;
1105    for( j=0 ; j < dim ; j++ ) v->elts[j] = m.elts[r][j] ;
1106 }
1107 
1108 
1109 
1110 /*---------------------------------------------------------------------------*/
1111 /*!
1112   Convert vector v into array f.
1113 */
1114 
vector_to_array(vector v,float * f)1115 void vector_to_array (vector v, float * f)
1116 {
1117   register int i;
1118 
1119   for (i = 0;  i < v.dim;  i++)
1120     f[i] = v.elts[i];
1121 }
1122 
1123 
1124 /*---------------------------------------------------------------------------*/
1125 /*!
1126   Add vector a to vector b.  Result is vector c.
1127 */
1128 
vector_add(vector a,vector b,vector * c)1129 void vector_add (vector a, vector b, vector * c)
1130 {
1131   register int i, dim;
1132 
1133   if (a.dim != b.dim)
1134     matrix_error ("Incompatible dimensions for vector addition");
1135 
1136   dim = a.dim;
1137 
1138   vector_create_noinit (dim, c);
1139 
1140   for (i = 0;  i < dim;  i++)
1141     c->elts[i] = a.elts[i] + b.elts[i];
1142 
1143 #ifdef ENABLE_FLOPS
1144   flops += dim ;
1145 #endif
1146 }
1147 
1148 
1149 /*---------------------------------------------------------------------------*/
1150 /*!
1151   Subtract vector b from vector a.  Result is vector c.
1152 */
1153 
vector_subtract(vector a,vector b,vector * c)1154 void vector_subtract (vector a, vector b, vector * c)
1155 {
1156   register int i, dim;
1157   register double *aa,*bb,*cc ;
1158 
1159   if (a.dim != b.dim)
1160     matrix_error ("Incompatible dimensions for vector subtraction");
1161 
1162   dim = a.dim;
1163 
1164   vector_create_noinit (dim, c);
1165 
1166   aa = a.elts ; bb = b.elts ; cc = c->elts ;
1167   for (i = 0;  i < dim;  i++)
1168 #if 0
1169     c->elts[i] = a.elts[i] - b.elts[i];
1170 #else
1171     cc[i] = aa[i] - bb[i] ;
1172 #endif
1173 
1174 #ifdef ENABLE_FLOPS
1175   flops += dim ;
1176 #endif
1177 }
1178 
1179 /*** for vector-matrix multiply routines below ***/
1180 
1181 #define UNROLL_VECMUL  /* RWCox */
1182 
1183 /*---------------------------------------------------------------------------*/
1184 /*!
1185   Right multiply matrix a by vector b.  Result is vector c.
1186 */
vector_multiply(matrix a,vector b,vector * c)1187 void vector_multiply (matrix a, vector b, vector * c)
1188 {
1189   register int rows, cols;
1190   register int i, j;
1191   register double *bb ;
1192   register double sum ;
1193 #ifdef DOTP
1194   register double **aa , *cc ;
1195 #else
1196   register double *aa ;
1197 #endif
1198 
1199 
1200   if (a.cols != b.dim){
1201     char str[444] ;
1202     sprintf(str,
1203             "Incompatible dimensions for vector multiplication: %dx%d X %d",
1204             a.rows,a.cols,b.dim ) ;
1205     matrix_error(str) ;
1206   }
1207 
1208   rows = a.rows;
1209   cols = a.cols;
1210 
1211   vector_create_noinit (rows, c);
1212 
1213   if( cols <= 0 ){
1214     for( i=0 ; i < rows ; i++ ) c->elts[i] = 0.0 ;
1215     return ;
1216   }
1217 
1218   bb = b.elts ;
1219 
1220 #ifdef MATVEC
1221   MATVEC( a , b , *c ) ;          /* 04 Mar 2005 */
1222 
1223 #elif defined(DOTP)               /* vectorized */
1224   aa = a.elts ; cc = c->elts ;
1225   i = rows%2 ;
1226   if( i == 1 ) DOTP(cols,aa[0],bb,cc) ;
1227   for( ; i < rows ; i+=2 ){
1228     DOTP(cols,aa[i]  ,bb,cc+i    ) ;
1229     DOTP(cols,aa[i+1],bb,cc+(i+1)) ;
1230   }
1231 #else
1232 
1233 #ifdef UNROLL_VECMUL
1234   switch( cols%4 ){
1235     case 0:
1236      for (i = 0;  i < rows;  i++){
1237         sum = 0.0 ; aa = a.elts[i] ;
1238         for (j = 0;  j < cols;  j+=4 )
1239           sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1240         c->elts[i] = sum ;
1241      }
1242     break ;
1243     case 1:
1244      for (i = 0;  i < rows;  i++){
1245         aa = a.elts[i] ; sum = aa[0]*bb[0] ;
1246         for (j = 1;  j < cols;  j+=4 )
1247           sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1248         c->elts[i] = sum ;
1249      }
1250     break ;
1251     case 2:
1252      for (i = 0;  i < rows;  i++){
1253         aa = a.elts[i] ; sum = aa[0]*bb[0]+aa[1]*bb[1] ;
1254         for (j = 2;  j < cols;  j+=4 )
1255           sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1256         c->elts[i] = sum ;
1257      }
1258     break ;
1259     case 3:
1260      for (i = 0;  i < rows;  i++){
1261         aa = a.elts[i] ; sum = aa[0]*bb[0]+aa[1]*bb[1]+aa[2]*bb[2] ;
1262         for (j = 3;  j < cols;  j+=4 )
1263           sum += aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1264         c->elts[i] = sum ;
1265      }
1266     break ;
1267   }
1268 #else
1269     for (i = 0;  i < rows;  i++){
1270         sum = 0.0 ; aa = a.elts[i] ;
1271         for (j = 0;  j < cols;  j++ ) sum += aa[j]*bb[j] ;
1272         c->elts[i] = sum ;
1273     }
1274 #endif /* UNROLL_VECMUL */
1275 
1276 #endif /* MATVEC, DOTP */
1277 
1278 #ifdef ENABLE_FLOPS
1279     flops += 2.0*rows*cols ;
1280     dotsum += rows*cols ; dotnum += rows ;
1281 #endif
1282     return ;
1283 }
1284 
1285 /*---------------------------------------------------------------------------*/
1286 /*!
1287   Right multiply matrix a-transpose by vector b.  Result is vector c.
1288 */
vector_multiply_transpose(matrix a,vector b,vector * c)1289 void vector_multiply_transpose (matrix a, vector b, vector * c)
1290 {
1291   register int rows, cols;
1292   register int i, j;
1293   register double *bb ;
1294   register double bj ;
1295   register double *aa , *cc ;
1296 
1297   if (a.rows != b.dim){
1298     char str[444] ;
1299     sprintf(str,
1300             "Incompatible dimensions for vector_multiply_transpose: [%dx%d]' X %d",
1301             a.rows,a.cols,b.dim ) ;
1302     matrix_error(str) ;
1303   }
1304 
1305   rows = a.rows; cols = a.cols;
1306 
1307   vector_create(cols, c);  /* initialized to 0 */
1308 
1309   if( rows <= 0 ) return ;
1310 
1311   bb = b.elts ; cc = c->elts ;
1312 
1313 #ifdef UNROLL_VECMUL
1314   switch( cols%4 ){
1315     case 0:
1316      for( j=0 ; j < rows ; j++ ){
1317        aa = a.elts[j] ; bj = bb[j] ;
1318        for( i=0 ; i < cols ; i+=4 ){
1319          cc[i]   += aa[i]  *bj ;
1320          cc[i+1] += aa[i+1]*bj ;
1321          cc[i+2] += aa[i+2]*bj ;
1322          cc[i+3] += aa[i+3]*bj ;
1323        }
1324      }
1325     break ;
1326 
1327     case 1:
1328      for( j=0 ; j < rows ; j++ ){
1329        aa = a.elts[j] ; bj = bb[j] ;
1330        cc[0] += aa[0]*bj ;
1331        for( i=1 ; i < cols ; i+=4 ){
1332          cc[i]   += aa[i]  *bj ;
1333          cc[i+1] += aa[i+1]*bj ;
1334          cc[i+2] += aa[i+2]*bj ;
1335          cc[i+3] += aa[i+3]*bj ;
1336        }
1337      }
1338     break ;
1339 
1340     case 2:
1341      for( j=0 ; j < rows ; j++ ){
1342        aa = a.elts[j] ; bj = bb[j] ;
1343        cc[0] += aa[0]*bj ;
1344        cc[1] += aa[1]*bj ;
1345        for( i=2 ; i < cols ; i+=4 ){
1346          cc[i]   += aa[i]  *bj ;
1347          cc[i+1] += aa[i+1]*bj ;
1348          cc[i+2] += aa[i+2]*bj ;
1349          cc[i+3] += aa[i+3]*bj ;
1350        }
1351      }
1352     break ;
1353 
1354     case 3:
1355      for( j=0 ; j < rows ; j++ ){
1356        aa = a.elts[j] ; bj = bb[j] ;
1357        cc[0] += aa[0]*bj ;
1358        cc[1] += aa[1]*bj ;
1359        cc[2] += aa[2]*bj ;
1360        for( i=3 ; i < cols ; i+=4 ){
1361          cc[i]   += aa[i]  *bj ;
1362          cc[i+1] += aa[i+1]*bj ;
1363          cc[i+2] += aa[i+2]*bj ;
1364          cc[i+3] += aa[i+3]*bj ;
1365        }
1366      }
1367   }
1368 #else
1369   for( j=0 ; j < rows ; j++ ){
1370     aa = a.elts[j] ; bj = bb[j] ;
1371     for( i=0 ; i < cols ; i++ ) cc[i] += aa[i]*bj ;
1372   }
1373 #endif
1374 
1375 #ifdef ENABLE_FLOPS
1376     flops += 2.0*rows*cols ;
1377     dotsum += rows*cols ; dotnum += cols ;
1378 #endif
1379     return ;
1380 }
1381 
1382 /*---------------------------------------------------------------------------*/
1383 /*!
1384   Compute d = c-a*b: a is a matrix; b,c,d are vectors -- RWCox
1385   26 Feb 2002: return value is sum of squares of d vector
1386 */
1387 
vector_multiply_subtract(matrix a,vector b,vector c,vector * d)1388 double vector_multiply_subtract (matrix a, vector b, vector c, vector * d)
1389 {
1390   register int rows, cols;
1391   register int i, j;
1392   register double *bb ;
1393 #ifdef DOTP
1394   double qsum,sum , **aa , *dd,*cc,*ee ;
1395 #else
1396   register double qsum,sum, *aa ;
1397 #endif
1398 
1399 
1400   if (a.cols != b.dim || a.rows != c.dim )
1401     matrix_error ("Incompatible dimensions for vector multiplication-subtraction");
1402 
1403   rows = a.rows;
1404   cols = a.cols;
1405 
1406   vector_create_noinit (rows, d);
1407 
1408   if( cols <= 0 ){
1409     qsum = 0.0 ;
1410     for( i=0 ; i < rows ; i++ ){
1411       d->elts[i] = c.elts[i] ;
1412       qsum += d->elts[i] * d->elts[i] ;
1413     }
1414     return qsum ;
1415   }
1416 
1417   qsum = 0.0 ; bb = b.elts ;
1418 
1419 #ifdef DOTP                                      /* vectorized */
1420   aa = a.elts ; dd = d->elts ; cc = c.elts ;
1421   ee = (double *)malloc(sizeof(double)*rows) ;
1422   i  = rows%2 ;
1423   if( i == 1 ) DOTP(cols,aa[0],bb,ee) ;
1424   for( ; i < rows ; i+=2 ){
1425     DOTP(cols,aa[i]  ,bb,ee+i    ) ;
1426     DOTP(cols,aa[i+1],bb,ee+(i+1)) ;
1427   }
1428   VSUB(rows,cc,ee,dd) ;
1429   DOTP(rows,dd,dd,&qsum) ;
1430   free((void *)ee) ;
1431 #else
1432 
1433 #ifdef UNROLL_VECMUL
1434   switch( cols%4 ){
1435     case 0:
1436      for (i = 0;  i < rows;  i++){
1437         aa = a.elts[i] ; sum = c.elts[i] ;
1438         for (j = 0;  j < cols;  j+=4 )
1439           sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1440         d->elts[i] = sum ; qsum += sum*sum ;
1441      }
1442     break ;
1443     case 1:
1444      for (i = 0;  i < rows;  i++){
1445         aa = a.elts[i] ; sum = c.elts[i]-aa[0]*bb[0] ;
1446         for (j = 1;  j < cols;  j+=4 )
1447           sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1448         d->elts[i] = sum ; qsum += sum*sum ;
1449      }
1450     break ;
1451     case 2:
1452      for (i = 0;  i < rows;  i++){
1453         aa = a.elts[i] ; sum = c.elts[i]-aa[0]*bb[0]-aa[1]*bb[1] ;
1454         for (j = 2;  j < cols;  j+=4 )
1455           sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1456         d->elts[i] = sum ; qsum += sum*sum ;
1457      }
1458     break ;
1459     case 3:
1460      for (i = 0;  i < rows;  i++){
1461         aa = a.elts[i] ; sum = c.elts[i]-aa[0]*bb[0]-aa[1]*bb[1]-aa[2]*bb[2] ;
1462         for (j = 3;  j < cols;  j+=4 )
1463           sum -= aa[j]*bb[j]+aa[j+1]*bb[j+1]+aa[j+2]*bb[j+2]+aa[j+3]*bb[j+3];
1464         d->elts[i] = sum ; qsum += sum*sum ;
1465      }
1466     break ;
1467   }
1468 #else
1469   for (i = 0;  i < rows;  i++){
1470     aa = a.elts[i] ; sum = c.elts[i] ;
1471     for (j = 0;  j < cols;  j++) sum -= aa[j] * bb[j] ;
1472     d->elts[i] = sum ; qsum += sum*sum ;
1473   }
1474 #endif /* UNROLL_VECMUL */
1475 
1476 #endif /* DOTP */
1477 
1478 #ifdef ENABLE_FLOPS
1479   flops += 2.0*rows*(cols+1) ;
1480   dotsum += rows*cols ; dotnum += rows ;
1481 #endif
1482 
1483   return qsum ;  /* 26 Feb 2003 */
1484 }
1485 
1486 /*---------------------------------------------------------------------------*/
1487 /*!
1488   Calculate dot product of vector a with vector b.
1489 */
1490 
vector_dot(vector a,vector b)1491 double vector_dot (vector a, vector b)
1492 {
1493   register int i, dim;
1494   register double sum;
1495   register double *aa , *bb ;
1496 
1497   if (a.dim != b.dim)
1498     matrix_error ("Incompatible dimensions for vector dot product");
1499 
1500   dim = a.dim;
1501 
1502   sum = 0.0;
1503   aa = a.elts ; bb = b.elts ;
1504   for (i = 0;  i < dim;  i++)
1505 #if 0
1506     sum += a.elts[i] * b.elts[i];
1507 #else
1508     sum += aa[i] * bb[i] ;
1509 #endif
1510 
1511 #ifdef ENABLE_FLOPS
1512   flops += 2.0*dim ;
1513 #endif
1514   return (sum);
1515 }
1516 
1517 /*--------------------------------------------------------------------------*/
1518 /*!
1519   Calculate dot product of vector a with itself -- 28 Dec 2001, RWCox.
1520 */
1521 
vector_dotself(vector a)1522 double vector_dotself( vector a )
1523 {
1524   register int i, dim;
1525   register double sum;
1526   register double *aa ;
1527 
1528   dim = a.dim;
1529   sum = 0.0;
1530   aa = a.elts ;
1531   for (i = 0;  i < dim;  i++)
1532 #if 0
1533     sum += a.elts[i] * a.elts[i];
1534 #else
1535     sum += aa[i] * aa[i] ;
1536 #endif
1537 
1538 #ifdef ENABLE_FLOPS
1539   flops += 2.0*dim ;
1540 #endif
1541   return (sum);
1542 }
1543 
1544 /*---------------------------------------------------------------------------*/
1545 /*!
1546   Compute the L_infinity norm of a matrix: the max absolute row sum.
1547 */
1548 
matrix_norm(matrix a)1549 double matrix_norm( matrix a )
1550 {
1551    int i,j , rows=a.rows, cols=a.cols ;
1552    double sum , smax=0.0 ;
1553 
1554    for (i = 0;  i < rows;  i++){
1555      sum = 0.0 ;
1556      for (j = 0;  j < cols;  j++) sum += fabs(a.elts[i][j]) ;
1557      if( sum > smax ) smax = sum ;
1558    }
1559 #ifdef ENABLE_FLOPS
1560    flops += 2.0*rows*cols ;
1561 #endif
1562    return smax ;
1563 }
1564 
1565 /*---------------------------------------------------------------------------*/
1566 
matrix_frobenius(matrix a)1567 double matrix_frobenius( matrix a )  /* sum of squares of all elements */
1568 {
1569    int i,j , rows=a.rows, cols=a.cols ;
1570    double sum=0.0  , **aa=a.elts ;
1571 
1572    for (i = 0;  i < rows;  i++){
1573      for (j = 0;  j < cols;  j++) sum += aa[i][j] * aa[i][j] ;
1574    }
1575 #ifdef ENABLE_FLOPS
1576    flops += 2.0*rows*cols ;
1577 #endif
1578    return sum ;
1579 }
1580 
1581 /*---------------------------------------------------------------------------*/
1582 
matrix_colsqsums(matrix a,vector * v)1583 void matrix_colsqsums( matrix a , vector *v )
1584 {
1585    int i,j , rows=a.rows , cols=a.cols ;
1586    double sum , **aa , *vp ;
1587 
1588    vector_create_noinit( cols , v ) ; vp = v->elts ; aa = a.elts ;
1589 
1590    for( j=0 ; j < cols ; j++ ){
1591      for( sum=0.0,i=0 ; i < rows ; i++ ) sum += aa[i][j] * aa[i][j] ;
1592      vp[j] = sqrt(sum) ;
1593    }
1594    return ;
1595 }
1596 
1597 /*---------------------------------------------------------------------------*/
1598 /*! Search a matrix for nearly identical column pairs, where "nearly identical"
1599     means they are correlated closer than 1-eps.
1600 
1601     Return is a pointer to an int array of the form
1602       [ i1 j1 i2 j2 ... -1 -1 ]
1603     where columns (i1,j1) are nearly the same, (i2,j2) also, etc.
1604     In addition:
1605      - A pair (i,-1) indicates that column #i is all zeros.
1606      - The array is terminated with the pair (-1,-1).
1607      - If there are no bad column pairs or all-zero columns, NULL is returned.
1608      - Pairs of all-zero columns are NOT reported.
1609      - The array should be free()-ed when you are done with it.
1610 -----------------------------------------------------------------------------*/
1611 
matrix_check_columns(matrix a,double eps)1612 int * matrix_check_columns( matrix a , double eps )  /* 14 Jul 2004 */
1613 {
1614    int i,j,k , rows=a.rows , cols=a.cols ;
1615    int *iar=NULL , nar=0 ;
1616    double sumi,sumj,sumd ;
1617 
1618    if( eps <= 0.0 ) eps = 1.e-5 ;
1619 
1620    for( i=0 ; i < cols ; i++ ){
1621      sumi = 0.0 ;
1622      for( k=0 ; k < rows ; k++ ) sumi += a.elts[k][i] * a.elts[k][i] ;
1623      if( sumi <= 0.0 ){
1624        iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
1625        iar[2*nar] = i ; iar[2*nar+1] = -1 ; nar++ ;
1626        continue ;                           /* skip to next column i */
1627      }
1628      for( j=i+1 ; j < cols ; j++ ){
1629        sumj = sumd = 0.0 ;
1630        for( k=0 ; k < rows ; k++ ){
1631          sumj += a.elts[k][j] * a.elts[k][j] ;
1632          sumd += a.elts[k][j] * a.elts[k][i] ;
1633        }
1634        if( sumj > 0.0 ){
1635          sumd = fabs(sumd) / sqrt(sumi*sumj) ;
1636          if( sumd >= 1.0-eps ){
1637            iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
1638            iar[2*nar] = i ; iar[2*nar+1] = j ; nar++ ;
1639          }
1640        }
1641      }
1642    }
1643 
1644    if( iar != NULL ){
1645      iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
1646      iar[2*nar] = iar[2*nar+1] = -1 ;
1647    }
1648 
1649    return iar ;
1650 }
1651 
1652 /*---------------------------------------------------------------------------*/
1653 /*! Return the sqrt(eigenvalues) of matrix X-transpose X, scaled to diagonal 1.
1654     The output points to a vector of doubles, of length X.cols.  This
1655     should be free()-ed when you are done with it.
1656 -----------------------------------------------------------------------------*/
1657 
matrix_singvals(matrix X)1658 double * matrix_singvals( matrix X )   /* 14 Jul 2004 */
1659 {
1660    int i,j,k , M=X.rows , N=X.cols ;
1661    double *a , *e , sum ;
1662 
1663    a = (double *) malloc( sizeof(double)*N*N ) ;
1664    e = (double *) malloc( sizeof(double)*N   ) ;
1665 
1666    for( i=0 ; i < N ; i++ ){
1667      for( j=0 ; j <= i ; j++ ){
1668        sum = 0.0 ;
1669        for( k=0 ; k < M ; k++ ) sum += X.elts[k][i] * X.elts[k][j] ;
1670        a[j+N*i] = sum ;
1671        if( j < i ) a[i+N*j] = sum ;
1672      }
1673    }
1674 
1675    for( i=0 ; i < N ; i++ ){
1676      if( a[i+N*i] > 0.0 ) e[i] = 1.0 / sqrt(a[i+N*i]) ;
1677      else                 e[i] = 1.0 ;
1678    }
1679    for( i=0 ; i < N ; i++ ){
1680      for( j=0 ; j < N ; j++ ) a[j+N*i] *= e[i]*e[j] ;
1681    }
1682 
1683    symeigval_double( N , a , e ) ;
1684    free( (void *)a ) ;
1685    for( i=0 ; i < N ; i++ ){
1686      e[i] = (e[i] <= 0.0) ? 0.0 : sqrt(e[i]) ;
1687    }
1688 #ifdef ENABLE_FLOPS
1689    flops += (M+N+2.0)*N*N+10.0*N ;
1690 #endif
1691    return e ;
1692 }
1693 
1694 /*---------------------------------------------------------------------------*/
1695 
1696 extern void svd_double( int, int, double *, double *, double *, double * ) ;
1697 
1698 static double pseps = 1.e-16 ;
1699 
matrix_psinv_seteps(double eps)1700 void matrix_psinv_seteps( double eps )
1701 {
1702    pseps = (eps > 0.0) ? eps : 1.e-16 ;
1703 }
1704 
1705 /*---------------------------------------------------------------------------*/
1706 /*! Given MxN matrix X, return the NxN matrix
1707 
1708        [  T  ]-1                       [  T  ]-1 T
1709        [ X X ]     and the NxM matrix  [ X X ]  X
1710 -----------------------------------------------------------------------------*/
1711 
matrix_psinv(matrix X,matrix * XtXinv,matrix * XtXinvXt)1712 void matrix_psinv( matrix X , matrix *XtXinv , matrix *XtXinvXt )
1713 {
1714    int m = X.rows , n = X.cols , ii,jj,kk ;
1715    double *amat , *umat , *vmat , *sval , *xfac , smax,del,sum ;
1716 
1717    if( m < 1 || n < 1 || m < n || (XtXinv == NULL && XtXinvXt == NULL) ) return;
1718 
1719    amat = (double *)calloc( sizeof(double),m*n ) ;  /* input matrix */
1720    umat = (double *)calloc( sizeof(double),m*n ) ;  /* left singular vectors */
1721    vmat = (double *)calloc( sizeof(double),n*n ) ;  /* right singular vectors */
1722    sval = (double *)calloc( sizeof(double),n   ) ;  /* singular values */
1723    xfac = (double *)calloc( sizeof(double),n   ) ;  /* column norms of [a] */
1724 
1725 #undef  A
1726 #undef  U
1727 #undef  V
1728 #define A(i,j) amat[(i)+(j)*m]
1729 #define U(i,j) umat[(i)+(j)*m]
1730 #define V(i,j) vmat[(i)+(j)*n]
1731 
1732    /* copy input matrix into amat */
1733 
1734    for( ii=0 ; ii < m ; ii++ )
1735      for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
1736 
1737    /* scale each column to have norm 1 */
1738 
1739    for( jj=0 ; jj < n ; jj++ ){
1740      sum = 0.0 ;
1741      for( ii=0 ; ii < m ; ii++ ) sum += A(ii,jj)*A(ii,jj) ;
1742      if( sum > 0.0 ) sum = 1.0/sqrt(sum) ;
1743      xfac[jj] = sum ;
1744      for( ii=0 ; ii < m ; ii++ ) A(ii,jj) *= sum ;
1745    }
1746 
1747    /* compute SVD of scaled matrix */
1748 
1749    svd_double( m , n , amat , sval , umat , vmat ) ;
1750 
1751    free((void *)amat) ;  /* done with this */
1752 
1753    /* find largest singular value */
1754 
1755    smax = sval[0] ;
1756    for( ii=1 ; ii < n ; ii++ )
1757      if( sval[ii] > smax ) smax = sval[ii] ;
1758 
1759    if( smax <= 0.0 ){                        /* this is bad */
1760      free((void *)xfac); free((void *)sval);
1761      free((void *)vmat); free((void *)umat);
1762      return;
1763    }
1764 
1765    for( ii=0 ; ii < n ; ii++ )
1766      if( sval[ii] < 0.0 ) sval[ii] = 0.0 ;
1767 
1768 #ifdef FLOATIZE
1769 #define PSINV_EPS 1.e-8
1770 #else
1771 #define PSINV_EPS pseps
1772 #endif
1773 
1774    /* "reciprocals" of singular values:  1/s is actually s/(s^2+del) */
1775 
1776    del = PSINV_EPS * smax*smax ;
1777    for( ii=0 ; ii < n ; ii++ )
1778      sval[ii] = sval[ii] / ( sval[ii]*sval[ii] + del ) ;
1779 
1780    /* create pseudo-inverse */
1781 
1782    if( XtXinvXt != NULL ){
1783      matrix_create( n , m , XtXinvXt ) ;
1784      for( ii=0 ; ii < n ; ii++ ){
1785        for( jj=0 ; jj < m ; jj++ ){
1786          sum = 0.0 ;
1787          for( kk=0 ; kk < n ; kk++ )
1788            sum += sval[kk] * V(ii,kk) * U(jj,kk) ;
1789          XtXinvXt->elts[ii][jj] = sum * xfac[ii] ;
1790        }
1791      }
1792    }
1793 
1794    if( XtXinv != NULL ){
1795      matrix_create( n , n , XtXinv ) ;
1796      for( ii=0 ; ii < n ; ii++ ) sval[ii] = sval[ii] * sval[ii] ;
1797      matrix_create( n , n , XtXinv ) ;
1798      for( ii=0 ; ii < n ; ii++ ){
1799        for( jj=0 ; jj < n ; jj++ ){
1800          sum = 0.0 ;
1801          for( kk=0 ; kk < n ; kk++ )
1802            sum += sval[kk] * V(ii,kk) * V(jj,kk) ;
1803          XtXinv->elts[ii][jj] = sum * xfac[ii] * xfac[jj] ;
1804        }
1805      }
1806    }
1807 
1808 #ifdef ENABLE_FLOPS
1809    flops += n*n*(n+2.0*m+2.0) ;
1810 #endif
1811    free((void *)xfac); free((void *)sval);
1812    free((void *)vmat); free((void *)umat);
1813    return;
1814 }
1815 
1816 /*---------------------------------------------------------------------------*/
1817 /*! Check a matrix for column collinearity and adjust it (via the SVD)
1818     if it needs it.  Return value is -1 if there is an error, 0 if no
1819     fix was made, or the number of bad singular values if a fix was made
1820     -- in which case the adjusted matrix is in Xa.
1821 *//*-------------------------------------------------------------------------*/
1822 #if 0
1823 int matrix_collinearity_fixup( matrix X , matrix *Xa )
1824 {
1825    int m = X.rows , n = X.cols , ii,jj,kk , nbad ;
1826    double *amat , *umat , *vmat , *sval , *xfac , smax,del,sum ;
1827 
1828    if( m < 1 || n < 1 || m < n ) return (-1) ;
1829 
1830    amat = (double *)calloc( sizeof(double),m*n ) ; /* input matrix */
1831    umat = (double *)calloc( sizeof(double),m*n ) ; /* left singular vectors */
1832    vmat = (double *)calloc( sizeof(double),n*n ) ; /* right singular vectors */
1833    sval = (double *)calloc( sizeof(double),n   ) ; /* singular values */
1834    xfac = (double *)calloc( sizeof(double),n   ) ; /* column norms of [a] */
1835 
1836 #undef  A
1837 #undef  U
1838 #undef  V
1839 #define A(i,j) amat[(i)+(j)*m]
1840 #define U(i,j) umat[(i)+(j)*m]
1841 #define V(i,j) vmat[(i)+(j)*n]
1842 
1843    /* copy input matrix into amat */
1844 
1845    for( ii=0 ; ii < m ; ii++ )
1846      for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
1847 
1848    /* scale each column to have L2 norm 1 */
1849 
1850    for( jj=0 ; jj < n ; jj++ ){
1851      sum = 0.0 ;
1852      for( ii=0 ; ii < m ; ii++ ) sum += A(ii,jj)*A(ii,jj) ;
1853      xfac[jj] = sqrt(sum) ;
1854      if( sum > 0.0 ){
1855        sum = 1.0 / xfac[jj] ;
1856        for( ii=0 ; ii < m ; ii++ ) A(ii,jj) *= sum ;
1857      }
1858    }
1859 
1860    /* compute SVD of scaled matrix */
1861 
1862    svd_double( m , n , amat , sval , umat , vmat ) ;
1863 
1864    free((void *)amat) ;  /* done with this */
1865 
1866    /* find largest singular value */
1867 
1868    if( sval[0] < 0.0 ) sval[0] = 0.0 ;
1869    smax = sval[0] ;
1870    for( ii=1 ; ii < n ; ii++ ){
1871           if( sval[ii] > smax ) smax = sval[ii] ;
1872      else if( sval[ii] < 0.0  ) sval[ii] = 0.0 ;
1873    }
1874 
1875    if( smax == 0.0 ){                        /* this is bad */
1876      free((void *)xfac); free((void *)sval);
1877      free((void *)vmat); free((void *)umat);
1878      return (-1);
1879    }
1880 
1881    /* adjust small singular values upwards */
1882 
1883    del = 1.e-6 * smax ;
1884    for( nbad=ii=0 ; ii < n ; ii++ )
1885      if( sval[ii] < del ){ sval[ii] = del ; nbad++ ; }
1886 
1887    /* if all were OK, then nothing more needs to be done */
1888 
1889    if( nbad == 0 || Xa == NULL ){
1890      free((void *)xfac); free((void *)sval);
1891      free((void *)vmat); free((void *)umat);
1892      return (nbad);
1893    }
1894 
1895    /* create and compute output matrix */
1896 
1897    matrix_create( m , n , Xa ) ;
1898 
1899    for( ii=0 ; ii < m ; ii++ ){
1900      for( jj = 0 ; jj < n ; jj++ ){
1901        sum = 0.0 ;
1902        for( kk=0 ; kk < n ; kk++ )
1903          sum += sval[kk] * U(ii,kk) * V(jj,kk) ;
1904        Xa->elts[ii][jj] = sum * xfac[jj] ;
1905      }
1906    }
1907 
1908    free((void *)xfac); free((void *)sval);
1909    free((void *)vmat); free((void *)umat);
1910    return (nbad);
1911 }
1912 #endif
1913 
1914 /*---------------------------------------------------------------------------*/
1915 
1916 static int allow_desing = 0 ;
matrix_allow_desing(int ii)1917 void matrix_allow_desing( int ii ){ allow_desing = ii ; }
1918 
1919 /*---------------------------------------------------------------------------*/
1920 /*! Given MxN matrix X, compute the NxN upper triangle factor R in X = QR.
1921     Must have M >= N (more rows than columns).
1922     Q is not computed.  If you want Q, then compute it as [Q] = [X] * inv[R].
1923     The return value is the number of diagonal elements that were adjusted
1924     because they were very tiny (should be 0 except in collinear cases).
1925 *//*-------------------------------------------------------------------------*/
1926 
matrix_qrr(matrix X,matrix * R)1927 int matrix_qrr( matrix X , matrix *R )
1928 {
1929    int m=X.rows , n=X.cols , ii,jj,kk , m1=m-1 , nsing=0 ;
1930    double *amat , x1 ;
1931    register double alp, sum , *uvec, *Ak;
1932 
1933    if( m < 2 || n < 1 || m < n || R == NULL || X.elts == NULL ) return (-1) ;
1934 
1935 #undef  A
1936 #define A(i,j) amat[(i)+(j)*m]
1937 
1938    amat = (double *)malloc( sizeof(double)*m*n ) ;  /* copy input matrix */
1939    uvec = (double *)malloc( sizeof(double)*m   ) ;  /* Householder vector */
1940 
1941    /* copy input matrix into amat == A */
1942 
1943    for( ii=0 ; ii < m ; ii++ )
1944      for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
1945 
1946    if( allow_desing )
1947      nsing = svd_desingularize( m , n , amat ) ;  /* Ides of March, 2010 */
1948 
1949    /* Householder transform each column of A in turn */
1950 
1951    for( jj=0 ; jj < n ; jj++ ){
1952      if( jj == m1 ) break ;  /* at last column AND have m==n */
1953      x1 = uvec[jj] = A(jj,jj) ;
1954      for( sum=0.0,ii=jj+1 ; ii < m ; ii++ ){
1955        uvec[ii] = alp = A(ii,jj) ; sum += alp*alp ;
1956      }
1957      if( sum == 0.0 ) continue ; /* tail of column is pre-reduced to 0 */
1958      alp = sqrt(sum+x1*x1) ; if( x1 > 0.0 ) alp = -alp ;
1959      x1 = uvec[jj] -= alp ; A(jj,jj) = alp ;
1960      alp = 2.0 / (sum+x1*x1) ;
1961      for( kk=jj+1 ; kk < n ; kk++ ){  /* process trailing columns */
1962        Ak = amat + kk*m ;
1963        for( sum=0.0,ii=jj ; ii < m1 ; ii+=2 )
1964          sum += uvec[ii]*Ak[ii] + uvec[ii+1]*Ak[ii+1] ;
1965        if( ii == m1 ) sum += uvec[m1]*Ak[m1] ;
1966        sum *= alp ;
1967        for( ii=jj ; ii < m1 ; ii+=2 ){
1968          Ak[ii] -= sum*uvec[ii] ; Ak[ii+1] -= sum*uvec[ii+1] ;
1969        }
1970        if( ii == m1 ) Ak[m1] -= sum*uvec[m1] ;
1971      }
1972    }
1973 
1974    /* copy result in A to output
1975       (changing row signs if needed to make R's diagonal non-negative) */
1976 
1977    matrix_create( n , n , R ) ;
1978    for( ii=0 ; ii < n ; ii++ ){
1979      for( jj=0 ; jj < ii ; jj++ ) R->elts[ii][jj] = 0.0 ; /* sub-diagonal */
1980      if( A(ii,ii) >= 0.0 )
1981        for( jj=ii ; jj < n ; jj++ ) R->elts[ii][jj] =  A(ii,jj) ;
1982      else
1983        for( jj=ii ; jj < n ; jj++ ) R->elts[ii][jj] = -A(ii,jj) ;
1984    }
1985 
1986 #if 0
1987 fprintf(stderr,"matrix_qrr diagonal:") ;
1988 for( ii=0 ; ii < n ; ii++ ) fprintf(stderr," %g",R->elts[ii][ii]) ;
1989 fprintf(stderr,"\n") ;
1990 #endif
1991 
1992 #if 0
1993    /* adjust tiny (or zero) diagonal elements, for solution stability */
1994 
1995    for( kk=ii=0 ; ii < n ; ii++ ){
1996      sum = 0.0 ;
1997      for( jj=0    ; jj < ii ; jj++ ) sum += fabs( R->elts[jj][ii] ) ;
1998      for( jj=ii+1 ; jj < n  ; jj++ ) sum += fabs( R->elts[ii][jj] ) ;
1999      sum = (sum==0.0) ? 1.0 : sum/n ;
2000      sum *= 1.e-7 ; alp = R->elts[ii][ii] ;  /* alp >= 0 from above */
2001      if( alp < sum ){
2002        kk++ ; R->elts[ii][ii] = alp + sum*(sum-alp)/(sum+alp) ;
2003      }
2004    }
2005 #endif
2006 
2007 #ifdef ENABLE_FLOPS
2008    flops += n*n*(2*m-0.666*n) ;
2009 #endif
2010 
2011    free((void *)uvec) ; free((void *)amat) ;
2012    return (nsing) ;
2013 }
2014 
2015 /*----------------- below is a test program for matrix_qrr() -----------------*/
2016 
2017 #if 0
2018 #include <stdlib.h>
2019 #include <math.h>
2020 #include <stdio.h>
2021 #include "matrix.h"
2022 int main( int argc , char *argv[] )
2023 {
2024    matrix X , R , Xt , XtX ;
2025    int m , n , ii,jj ;
2026 
2027    matrix_initialize(&X) ; matrix_initialize(&R)  ;
2028    matrix_initialize(&Xt); matrix_initialize(&XtX);
2029 
2030    if( argc < 3 ) exit(0);
2031 
2032    m = (int)strtod(argv[1],NULL) ; if( m < 2 ) exit(0) ;
2033    n = (int)strtod(argv[2],NULL) ; if( n < 1 || n > m ) exit(0) ;
2034 
2035    matrix_create( m , n , &X ) ;
2036    for( jj=0 ; jj < n ; jj++ )
2037      for( ii=0 ; ii < m ; ii++ )
2038        X.elts[ii][jj] = sin(1.0+ii+jj) + cos((1.0+ii*jj)/(1.0+ii+jj)) ;
2039 
2040    matrix_transpose( X , &Xt ) ;
2041    matrix_multiply ( Xt , X , &XtX ) ;
2042    printf("=== X matrix:\n") ; matrix_print(X) ;
2043    printf("=== XtX matrix:\n") ; matrix_print(XtX) ;
2044 
2045    matrix_qrr( X , &R ) ;
2046    printf("=== R matrix:\n") ; matrix_print(R) ;
2047 
2048    matrix_transpose( R , &Xt ) ;
2049    matrix_multiply( Xt , R , &XtX ) ;
2050    printf("=== RtR matrix:\n") ; matrix_print(XtX) ;
2051    exit(0) ;
2052 }
2053 #endif
2054 
2055 /*---------------------------------------------------------------------------*/
2056 /*! Solve [R] [x] = [b] for [x] where R is upper triangular. */
2057 
vector_rr_solve(matrix R,vector b,vector * x)2058 void vector_rr_solve( matrix R , vector b , vector *x )
2059 {
2060    register int n , ii,jj , n1 ;
2061    register double sum , *xp , *bp , *rr ;
2062 
2063    n = R.rows ; n1 = n-1 ;
2064    if( n < 1 || R.cols != n || x == NULL ) return ;
2065 
2066    vector_create_noinit( n , x ) ; xp = x->elts ; bp = b.elts ;
2067 
2068    /* backwards loop, from last element to first */
2069 
2070    for( ii=n-1 ; ii >= 0 ; ii-- ){
2071      rr = R.elts[ii] ; sum = bp[ii] ;
2072      for( jj=ii+1 ; jj < n1 ; jj+=2 )         /* unrolled by 2 */
2073        sum -= rr[jj] * xp[jj] + rr[jj+1] * xp[jj+1] ;
2074      if( jj == n1 ) sum -= rr[jj] * xp[jj] ;  /* fix unroll if odd length */
2075      xp[ii] = sum / rr[ii] ;
2076    }
2077 
2078    return ;
2079 }
2080 
2081 /*---------------------------------------------------------------------------*/
2082 /*! Solve [R]' [x] = [b] for [x] where R is upper triangular. */
2083 
vector_rrtran_solve(matrix R,vector b,vector * x)2084 void vector_rrtran_solve( matrix R , vector b , vector *x )
2085 {
2086    register int n , ii,jj , n1 ;
2087    register double sum , *xp , *bp , *rr ;
2088 
2089    n = R.rows ; n1 = n-1 ;
2090    if( n < 1 || R.cols != n || x == NULL ) return ;
2091 
2092    bp = b.elts ;
2093 #if 0             /* the obvious way, but is slower */
2094    vector_create_noinit( n , x ) ; xp = x->elts ;
2095    for( ii=0 ; ii < n ; ii++ ){
2096      for( sum=bp[ii],jj=0 ; jj < ii ; jj++ )
2097        sum -= R.elts[jj][ii] * xp[jj] ;
2098      xp[ii] = sum / R.elts[ii][ii] ;
2099    }
2100 #else             /* the row ordered way, which is faster in cache */
2101    vector_equate( b , x ) ; xp = x->elts ;
2102    for( ii=0 ; ii < n ; ii++ ){
2103      rr = R.elts[ii] ; sum = xp[ii] = xp[ii] / rr[ii] ;
2104      for( jj=ii+1 ; jj < n1 ; jj+=2 ){     /* unrolled by 2 */
2105        xp[jj] -= rr[jj]*sum ; xp[jj+1] -= rr[jj+1]*sum ;
2106      }
2107      if( jj == n1 ) xp[jj] -= rr[jj]*sum ; /* fix unroll if odd length */
2108    }
2109 #endif
2110 
2111    return ;
2112 }
2113 
2114 /*---------------------------------------------------------------------------*/
2115 /*! Solve [R] [X] = [B] for matrix X, where R is upper triangular. */
2116 
matrix_rr_solve(matrix R,matrix B,matrix * X)2117 void matrix_rr_solve( matrix R , matrix B , matrix *X )
2118 {
2119    int n=R.rows , m=B.cols , i,j ;
2120    vector u, v ;
2121 
2122    if( R.cols != n || B.rows != n || X == NULL ) return ;
2123 
2124    vector_initialize(&u) ; vector_initialize(&v) ;
2125    matrix_create( n , m , X ) ;
2126 
2127    /* extract each column from B, solve, put resulting column into X */
2128 
2129    for( j=0 ; j < m ; j++ ){
2130      column_to_vector( B , j , &u ) ;
2131      vector_rr_solve( R , u , &v ) ;
2132      for( i=0 ; i < n ; i++ ) X->elts[i][j] = v.elts[i] ;
2133    }
2134    vector_destroy(&v) ; vector_destroy(&u) ; return ;
2135 }
2136 
2137 /*---------------------------------------------------------------------------*/
2138 /*! Solve [R'] [X] = [B] for matrix X, where R is upper triangular
2139     (and so R' is lower triangular). */
2140 
matrix_rrtran_solve(matrix R,matrix B,matrix * X)2141 void matrix_rrtran_solve( matrix R , matrix B , matrix *X )
2142 {
2143    int n=R.rows , m=B.cols , i,j ;
2144    vector u, v ;
2145 
2146    if( R.cols != n || B.rows != n || X == NULL ) return ;
2147 
2148    vector_initialize(&u) ; vector_initialize(&v) ;
2149    matrix_create( n , m , X ) ;
2150 
2151    /* extract each column from B, solve, put resulting column into X */
2152 
2153    for( j=0 ; j < m ; j++ ){
2154      column_to_vector( B , j , &u ) ;
2155      vector_rrtran_solve( R , u , &v ) ;
2156      for( i=0 ; i < n ; i++ ) X->elts[i][j] = v.elts[i] ;
2157    }
2158    vector_destroy(&v) ; vector_destroy(&u) ; return ;
2159 }
2160 
2161 /*---------------------------------------------------------------------------*/
2162 /* not current used anywhere */
2163 
2164 #if 0
2165 int matrix_desingularize( matrix X )
2166 {
2167    int m = X.rows , n = X.cols , ii,jj , nfix ;
2168    double *amat , *xfac , sum ;
2169 
2170    if( m < 1 || n < 1 ) return -1 ;
2171 
2172    amat = (double *)calloc( sizeof(double),m*n ) ;  /* input matrix */
2173    xfac = (double *)calloc( sizeof(double),n   ) ;  /* column norms of [a] */
2174 
2175 #undef  A
2176 #define A(i,j) amat[(i)+(j)*m]
2177 
2178    /* copy input matrix into amat */
2179 
2180    for( ii=0 ; ii < m ; ii++ )
2181      for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
2182 
2183    /* scale each column to have norm 1 */
2184 
2185    for( jj=0 ; jj < n ; jj++ ){
2186 #if 0
2187      sum = 0.0 ;
2188      for( ii=0 ; ii < m ; ii++ ) sum += A(ii,jj)*A(ii,jj) ;
2189      if( sum > 0.0 ){
2190        xfac[jj] = sqrt(sum) ;
2191        sum      = 1.0 / sum ;
2192        for( ii=0 ; ii < m ; ii++ ) A(ii,jj) *= sum ;
2193      } else {
2194        xfac[jj] = 1.0 ;
2195      }
2196 #else
2197      xfac[jj] = 1.0 ;
2198 #endif
2199    }
2200 
2201    nfix = svd_desingularize( m , n , amat ) ;
2202 
2203    if( nfix > 0 ){ /* put fixed values back in place */
2204      for( ii=0 ; ii < m ; ii++ ){
2205        for( jj=0 ; jj < n ; jj++ ){
2206          X.elts[ii][jj] = A(ii,jj) * xfac[jj] ;
2207        }
2208      }
2209    }
2210 
2211    free(xfac) ; free(amat) ; return nfix ;
2212 }
2213 #endif
2214