1 /* eigen/francis.c
2  *
3  * Copyright (C) 2006, 2007 Patrick Alken
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or (at
8  * your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  */
19 
20 #include <config.h>
21 #include <stdlib.h>
22 #include <math.h>
23 #include <gsl/gsl_eigen.h>
24 #include <gsl/gsl_linalg.h>
25 #include <gsl/gsl_math.h>
26 #include <gsl/gsl_blas.h>
27 #include <gsl/gsl_vector.h>
28 #include <gsl/gsl_vector_complex.h>
29 #include <gsl/gsl_matrix.h>
30 #include <gsl/gsl_complex.h>
31 #include <gsl/gsl_complex_math.h>
32 
33 /*
34  * This module computes the eigenvalues of a real upper hessenberg
35  * matrix, using the classical double shift Francis QR algorithm.
36  * It will also optionally compute the full Schur form and matrix of
37  * Schur vectors.
38  *
39  * See Golub & Van Loan, "Matrix Computations" (3rd ed),
40  * algorithm 7.5.2
41  */
42 
43 /* exceptional shift coefficients - these values are from LAPACK DLAHQR */
44 #define GSL_FRANCIS_COEFF1        (0.75)
45 #define GSL_FRANCIS_COEFF2        (-0.4375)
46 
47 static inline void francis_schur_decomp(gsl_matrix * H,
48                                         gsl_vector_complex * eval,
49                                         gsl_eigen_francis_workspace * w);
50 static inline size_t francis_search_subdiag_small_elements(gsl_matrix * A);
51 static inline int francis_qrstep(gsl_matrix * H,
52                                  gsl_eigen_francis_workspace * w);
53 static inline void francis_schur_standardize(gsl_matrix *A,
54                                              gsl_complex *eval1,
55                                              gsl_complex *eval2,
56                                              gsl_eigen_francis_workspace *w);
57 static inline size_t francis_get_submatrix(gsl_matrix *A, gsl_matrix *B);
58 static void francis_standard_form(gsl_matrix *A, double *cs, double *sn);
59 
60 /*
61 gsl_eigen_francis_alloc()
62 
63 Allocate a workspace for solving the nonsymmetric eigenvalue problem.
64 The size of this workspace is O(1)
65 
66 Inputs: none
67 
68 Return: pointer to workspace
69 */
70 
71 gsl_eigen_francis_workspace *
gsl_eigen_francis_alloc(void)72 gsl_eigen_francis_alloc(void)
73 {
74   gsl_eigen_francis_workspace *w;
75 
76   w = (gsl_eigen_francis_workspace *)
77       calloc (1, sizeof (gsl_eigen_francis_workspace));
78 
79   if (w == 0)
80     {
81       GSL_ERROR_NULL ("failed to allocate space for workspace", GSL_ENOMEM);
82     }
83 
84   /* these are filled in later */
85 
86   w->size = 0;
87   w->max_iterations = 0;
88   w->n_iter = 0;
89   w->n_evals = 0;
90 
91   w->compute_t = 0;
92   w->Z = NULL;
93   w->H = NULL;
94 
95   return (w);
96 } /* gsl_eigen_francis_alloc() */
97 
98 /*
99 gsl_eigen_francis_free()
100   Free francis workspace w
101 */
102 
103 void
gsl_eigen_francis_free(gsl_eigen_francis_workspace * w)104 gsl_eigen_francis_free (gsl_eigen_francis_workspace *w)
105 {
106   RETURN_IF_NULL (w);
107   free(w);
108 } /* gsl_eigen_francis_free() */
109 
110 /*
111 gsl_eigen_francis_T()
112   Called when we want to compute the Schur form T, or no longer
113 compute the Schur form T
114 
115 Inputs: compute_t - 1 to compute T, 0 to not compute T
116         w         - francis workspace
117 */
118 
119 void
gsl_eigen_francis_T(const int compute_t,gsl_eigen_francis_workspace * w)120 gsl_eigen_francis_T (const int compute_t, gsl_eigen_francis_workspace *w)
121 {
122   w->compute_t = compute_t;
123 }
124 
125 /*
126 gsl_eigen_francis()
127 
128 Solve the nonsymmetric eigenvalue problem
129 
130 H x = \lambda x
131 
132 for the eigenvalues \lambda using algorithm 7.5.2 of
133 Golub & Van Loan, "Matrix Computations" (3rd ed)
134 
135 Inputs: H    - upper hessenberg matrix
136         eval - where to store eigenvalues
137         w    - workspace
138 
139 Return: success or error - if error code is returned,
140         then the QR procedure did not converge in the
141         allowed number of iterations. In the event of non-
142         convergence, the number of eigenvalues found will
143         still be stored in the beginning of eval,
144 
145 Notes: On output, the diagonal of H contains 1-by-1 or 2-by-2
146        blocks containing the eigenvalues. If T is desired,
147        H will contain the full Schur form on output.
148 */
149 
150 int
gsl_eigen_francis(gsl_matrix * H,gsl_vector_complex * eval,gsl_eigen_francis_workspace * w)151 gsl_eigen_francis (gsl_matrix * H, gsl_vector_complex * eval,
152                    gsl_eigen_francis_workspace * w)
153 {
154   /* check matrix and vector sizes */
155 
156   if (H->size1 != H->size2)
157     {
158       GSL_ERROR ("matrix must be square to compute eigenvalues", GSL_ENOTSQR);
159     }
160   else if (eval->size != H->size1)
161     {
162       GSL_ERROR ("eigenvalue vector must match matrix size", GSL_EBADLEN);
163     }
164   else
165     {
166       const size_t N = H->size1;
167       int j;
168 
169       /*
170        * Set internal parameters which depend on matrix size.
171        * The Francis solver can be called with any size matrix
172        * since the workspace does not depend on N.
173        * Furthermore, multishift solvers which call the Francis
174        * solver may need to call it with different sized matrices
175        */
176       w->size = N;
177       w->max_iterations = 30 * N;
178 
179       /*
180        * save a pointer to original matrix since francis_schur_decomp
181        * is recursive
182        */
183       w->H = H;
184 
185       w->n_iter = 0;
186       w->n_evals = 0;
187 
188       /*
189        * zero out the first two subdiagonals (below the main subdiagonal)
190        * needed as scratch space by the QR sweep routine
191        */
192       for (j = 0; j < (int) N - 3; ++j)
193         {
194           gsl_matrix_set(H, (size_t) j + 2, (size_t) j, 0.0);
195           gsl_matrix_set(H, (size_t) j + 3, (size_t) j, 0.0);
196         }
197 
198       if (N > 2)
199         gsl_matrix_set(H, N - 1, N - 3, 0.0);
200 
201       /*
202        * compute Schur decomposition of H and store eigenvalues
203        * into eval
204        */
205       francis_schur_decomp(H, eval, w);
206 
207       if (w->n_evals != N)
208         {
209           GSL_ERROR ("maximum iterations reached without finding all eigenvalues", GSL_EMAXITER);
210         }
211 
212       return GSL_SUCCESS;
213     }
214 } /* gsl_eigen_francis() */
215 
216 /*
217 gsl_eigen_francis_Z()
218 
219 Solve the nonsymmetric eigenvalue problem for a Hessenberg
220 matrix
221 
222 H x = \lambda x
223 
224 for the eigenvalues \lambda using the Francis double-shift
225 method.
226 
227 Here we compute the real Schur form
228 
229 T = Q^t H Q
230 
231 with the diagonal blocks of T giving us the eigenvalues.
232 Q is the matrix of Schur vectors.
233 
234 Originally, H was obtained from a general nonsymmetric matrix
235 A via a transformation
236 
237 H = U^t A U
238 
239 so that
240 
241 T = (UQ)^t A (UQ) = Z^t A Z
242 
243 Z is the matrix of Schur vectors computed by this algorithm
244 
245 Inputs: H    - upper hessenberg matrix
246         eval - where to store eigenvalues
247         Z    - where to store Schur vectors
248         w    - workspace
249 
250 Notes: 1) If T is computed, it is stored in H on output. Otherwise,
251           the diagonal of H will contain 1-by-1 and 2-by-2 blocks
252           containing the eigenvalues.
253 
254        2) The matrix Z must be initialized to the Hessenberg
255           similarity matrix U. Or if you want the eigenvalues
256           of H, initialize Z to the identity matrix.
257 */
258 
259 int
gsl_eigen_francis_Z(gsl_matrix * H,gsl_vector_complex * eval,gsl_matrix * Z,gsl_eigen_francis_workspace * w)260 gsl_eigen_francis_Z (gsl_matrix * H, gsl_vector_complex * eval,
261                      gsl_matrix * Z, gsl_eigen_francis_workspace * w)
262 {
263   int s;
264 
265   /* set internal Z pointer so we know to accumulate transformations */
266   w->Z = Z;
267 
268   s = gsl_eigen_francis(H, eval, w);
269 
270   w->Z = NULL;
271 
272   return s;
273 } /* gsl_eigen_francis_Z() */
274 
275 /********************************************
276  *           INTERNAL ROUTINES              *
277  ********************************************/
278 
279 /*
280 francis_schur_decomp()
281   Compute the Schur decomposition of the matrix H
282 
283 Inputs: H     - hessenberg matrix
284         eval  - where to store eigenvalues
285         w     - workspace
286 
287 Return: none
288 */
289 
290 static inline void
francis_schur_decomp(gsl_matrix * H,gsl_vector_complex * eval,gsl_eigen_francis_workspace * w)291 francis_schur_decomp(gsl_matrix * H, gsl_vector_complex * eval,
292                      gsl_eigen_francis_workspace * w)
293 {
294   gsl_matrix_view m;   /* active matrix we are working on */
295   size_t N;            /* size of matrix */
296   size_t q;            /* index of small subdiagonal element */
297   gsl_complex lambda1, /* eigenvalues */
298               lambda2;
299 
300   N = H->size1;
301   m = gsl_matrix_submatrix(H, 0, 0, N, N);
302 
303   while ((N > 2) && ((w->n_iter)++ < w->max_iterations))
304     {
305       q = francis_search_subdiag_small_elements(&m.matrix);
306 
307       if (q == 0)
308         {
309           /*
310            * no small subdiagonal element found - perform a QR
311            * sweep on the active reduced hessenberg matrix
312            */
313           francis_qrstep(&m.matrix, w);
314           continue;
315         }
316 
317       /*
318        * a small subdiagonal element was found - one or two eigenvalues
319        * have converged or the matrix has split into two smaller matrices
320        */
321 
322       if (q == (N - 1))
323         {
324           /*
325            * the last subdiagonal element of the matrix is 0 -
326            * m_{NN} is a real eigenvalue
327            */
328           GSL_SET_COMPLEX(&lambda1,
329                           gsl_matrix_get(&m.matrix, q, q), 0.0);
330           gsl_vector_complex_set(eval, w->n_evals, lambda1);
331           w->n_evals += 1;
332           w->n_iter = 0;
333 
334           --N;
335           m = gsl_matrix_submatrix(&m.matrix, 0, 0, N, N);
336         }
337       else if (q == (N - 2))
338         {
339           gsl_matrix_view v;
340 
341           /*
342            * The bottom right 2-by-2 block of m is an eigenvalue
343            * system
344            */
345 
346           v = gsl_matrix_submatrix(&m.matrix, q, q, 2, 2);
347           francis_schur_standardize(&v.matrix, &lambda1, &lambda2, w);
348 
349           gsl_vector_complex_set(eval, w->n_evals, lambda1);
350           gsl_vector_complex_set(eval, w->n_evals + 1, lambda2);
351           w->n_evals += 2;
352           w->n_iter = 0;
353 
354           N -= 2;
355           m = gsl_matrix_submatrix(&m.matrix, 0, 0, N, N);
356         }
357       else if (q == 1)
358         {
359           /* the first matrix element is an eigenvalue */
360           GSL_SET_COMPLEX(&lambda1,
361                           gsl_matrix_get(&m.matrix, 0, 0), 0.0);
362           gsl_vector_complex_set(eval, w->n_evals, lambda1);
363           w->n_evals += 1;
364           w->n_iter = 0;
365 
366           --N;
367           m = gsl_matrix_submatrix(&m.matrix, 1, 1, N, N);
368         }
369       else if (q == 2)
370         {
371           gsl_matrix_view v;
372 
373           /* the upper left 2-by-2 block is an eigenvalue system */
374 
375           v = gsl_matrix_submatrix(&m.matrix, 0, 0, 2, 2);
376           francis_schur_standardize(&v.matrix, &lambda1, &lambda2, w);
377 
378           gsl_vector_complex_set(eval, w->n_evals, lambda1);
379           gsl_vector_complex_set(eval, w->n_evals + 1, lambda2);
380           w->n_evals += 2;
381           w->n_iter = 0;
382 
383           N -= 2;
384           m = gsl_matrix_submatrix(&m.matrix, 2, 2, N, N);
385         }
386       else
387         {
388           gsl_matrix_view v;
389 
390           /*
391            * There is a zero element on the subdiagonal somewhere
392            * in the middle of the matrix - we can now operate
393            * separately on the two submatrices split by this
394            * element. q is the row index of the zero element.
395            */
396 
397           /* operate on lower right (N - q)-by-(N - q) block first */
398           v = gsl_matrix_submatrix(&m.matrix, q, q, N - q, N - q);
399           francis_schur_decomp(&v.matrix, eval, w);
400 
401           /* operate on upper left q-by-q block */
402           v = gsl_matrix_submatrix(&m.matrix, 0, 0, q, q);
403           francis_schur_decomp(&v.matrix, eval, w);
404 
405           N = 0;
406         }
407     }
408 
409   /* handle special cases of N = 1 or 2 */
410 
411   if (N == 1)
412     {
413       GSL_SET_COMPLEX(&lambda1, gsl_matrix_get(&m.matrix, 0, 0), 0.0);
414       gsl_vector_complex_set(eval, w->n_evals, lambda1);
415       w->n_evals += 1;
416       w->n_iter = 0;
417     }
418   else if (N == 2)
419     {
420       francis_schur_standardize(&m.matrix, &lambda1, &lambda2, w);
421       gsl_vector_complex_set(eval, w->n_evals, lambda1);
422       gsl_vector_complex_set(eval, w->n_evals + 1, lambda2);
423       w->n_evals += 2;
424       w->n_iter = 0;
425     }
426 } /* francis_schur_decomp() */
427 
428 /*
429 francis_qrstep()
430   Perform a Francis QR step.
431 
432 See Golub & Van Loan, "Matrix Computations" (3rd ed),
433 algorithm 7.5.1
434 
435 Inputs: H - upper Hessenberg matrix
436         w - workspace
437 
438 Notes: The matrix H must be "reduced", ie: have no tiny subdiagonal
439        elements. When computing the first householder reflection,
440        we divide by H_{21} so it is necessary that this element
441        is not zero. When a subdiagonal element becomes negligible,
442        the calling function should call this routine with the
443        submatrices split by that element, so that we don't divide
444        by zeros.
445 */
446 
447 static inline int
francis_qrstep(gsl_matrix * H,gsl_eigen_francis_workspace * w)448 francis_qrstep(gsl_matrix * H, gsl_eigen_francis_workspace * w)
449 {
450   const size_t N = H->size1;
451   size_t i;        /* looping */
452   gsl_matrix_view m;
453   double tau_i;    /* householder coefficient */
454   double dat[3];   /* householder vector */
455   double scale;    /* scale factor to avoid overflow */
456   gsl_vector_view v2, v3;
457   size_t q, r;
458   size_t top = 0;  /* location of H in original matrix */
459   double s,
460          disc;
461   double h_nn,     /* H(n,n) */
462          h_nm1nm1, /* H(n-1,n-1) */
463          h_cross,  /* H(n,n-1) * H(n-1,n) */
464          h_tmp1,
465          h_tmp2;
466 
467   v2 = gsl_vector_view_array(dat, 2);
468   v3 = gsl_vector_view_array(dat, 3);
469 
470   if ((w->n_iter % 10) == 0)
471     {
472       /*
473        * exceptional shifts: we have gone 10 iterations
474        * without finding a new eigenvalue, try a new choice of shifts.
475        * See LAPACK routine DLAHQR
476        */
477       s = fabs(gsl_matrix_get(H, N - 1, N - 2)) +
478           fabs(gsl_matrix_get(H, N - 2, N - 3));
479       h_nn = gsl_matrix_get(H, N - 1, N - 1) + GSL_FRANCIS_COEFF1 * s;
480       h_nm1nm1 = h_nn;
481       h_cross = GSL_FRANCIS_COEFF2 * s * s;
482     }
483   else
484     {
485       /*
486        * normal shifts - compute Rayleigh quotient and use
487        * Wilkinson shift if possible
488        */
489 
490       h_nn = gsl_matrix_get(H, N - 1, N - 1);
491       h_nm1nm1 = gsl_matrix_get(H, N - 2, N - 2);
492       h_cross = gsl_matrix_get(H, N - 1, N - 2) *
493                 gsl_matrix_get(H, N - 2, N - 1);
494 
495       disc = 0.5 * (h_nm1nm1 - h_nn);
496       disc = disc * disc + h_cross;
497       if (disc > 0.0)
498         {
499           double ave;
500 
501           /* real roots - use Wilkinson's shift twice */
502           disc = sqrt(disc);
503           ave = 0.5 * (h_nm1nm1 + h_nn);
504           if (fabs(h_nm1nm1) - fabs(h_nn) > 0.0)
505             {
506               h_nm1nm1 = h_nm1nm1 * h_nn - h_cross;
507               h_nn = h_nm1nm1 / (disc * GSL_SIGN(ave) + ave);
508             }
509           else
510             {
511               h_nn = disc * GSL_SIGN(ave) + ave;
512             }
513 
514           h_nm1nm1 = h_nn;
515           h_cross = 0.0;
516         }
517     }
518 
519   h_tmp1 = h_nm1nm1 - gsl_matrix_get(H, 0, 0);
520   h_tmp2 = h_nn - gsl_matrix_get(H, 0, 0);
521 
522   /*
523    * These formulas are equivalent to those in Golub & Van Loan
524    * for the normal shift case - the terms have been rearranged
525    * to reduce possible roundoff error when subdiagonal elements
526    * are small
527    */
528 
529   dat[0] = (h_tmp1*h_tmp2 - h_cross) / gsl_matrix_get(H, 1, 0) +
530            gsl_matrix_get(H, 0, 1);
531   dat[1] = gsl_matrix_get(H, 1, 1) - gsl_matrix_get(H, 0, 0) - h_tmp1 -
532            h_tmp2;
533   dat[2] = gsl_matrix_get(H, 2, 1);
534 
535   scale = fabs(dat[0]) + fabs(dat[1]) + fabs(dat[2]);
536   if (scale != 0.0)
537     {
538       /* scale to prevent overflow or underflow */
539       dat[0] /= scale;
540       dat[1] /= scale;
541       dat[2] /= scale;
542     }
543 
544   if (w->Z || w->compute_t)
545     {
546       /*
547        * get absolute indices of this (sub)matrix relative to the
548        * original Hessenberg matrix
549        */
550       top = francis_get_submatrix(w->H, H);
551     }
552 
553   for (i = 0; i < N - 2; ++i)
554     {
555       tau_i = gsl_linalg_householder_transform(&v3.vector);
556 
557       if (tau_i != 0.0)
558         {
559           /* q = max(1, i - 1) */
560           q = (1 > ((int)i - 1)) ? 0 : (i - 1);
561 
562           /* r = min(i + 3, N - 1) */
563           r = ((i + 3) < (N - 1)) ? (i + 3) : (N - 1);
564 
565           if (w->compute_t)
566             {
567               /*
568                * We are computing the Schur form T, so we
569                * need to transform the whole matrix H
570                *
571                * H -> P_k^t H P_k
572                *
573                * where P_k is the current Householder matrix
574                */
575 
576               /* apply left householder matrix (I - tau_i v v') to H */
577               m = gsl_matrix_submatrix(w->H,
578                                        top + i,
579                                        top + q,
580                                        3,
581                                        w->size - top - q);
582               gsl_linalg_householder_hm(tau_i, &v3.vector, &m.matrix);
583 
584               /* apply right householder matrix (I - tau_i v v') to H */
585               m = gsl_matrix_submatrix(w->H,
586                                        0,
587                                        top + i,
588                                        top + r + 1,
589                                        3);
590               gsl_linalg_householder_mh(tau_i, &v3.vector, &m.matrix);
591             }
592           else
593             {
594               /*
595                * We are not computing the Schur form T, so we
596                * only need to transform the active block
597                */
598 
599               /* apply left householder matrix (I - tau_i v v') to H */
600               m = gsl_matrix_submatrix(H, i, q, 3, N - q);
601               gsl_linalg_householder_hm(tau_i, &v3.vector, &m.matrix);
602 
603               /* apply right householder matrix (I - tau_i v v') to H */
604               m = gsl_matrix_submatrix(H, 0, i, r + 1, 3);
605               gsl_linalg_householder_mh(tau_i, &v3.vector, &m.matrix);
606             }
607 
608           if (w->Z)
609             {
610               /* accumulate the similarity transformation into Z */
611               m = gsl_matrix_submatrix(w->Z, 0, top + i, w->size, 3);
612               gsl_linalg_householder_mh(tau_i, &v3.vector, &m.matrix);
613             }
614         } /* if (tau_i != 0.0) */
615 
616       dat[0] = gsl_matrix_get(H, i + 1, i);
617       dat[1] = gsl_matrix_get(H, i + 2, i);
618       if (i < (N - 3))
619         {
620           dat[2] = gsl_matrix_get(H, i + 3, i);
621         }
622 
623       scale = fabs(dat[0]) + fabs(dat[1]) + fabs(dat[2]);
624       if (scale != 0.0)
625         {
626           /* scale to prevent overflow or underflow */
627           dat[0] /= scale;
628           dat[1] /= scale;
629           dat[2] /= scale;
630         }
631     } /* for (i = 0; i < N - 2; ++i) */
632 
633   scale = fabs(dat[0]) + fabs(dat[1]);
634   if (scale != 0.0)
635     {
636       /* scale to prevent overflow or underflow */
637       dat[0] /= scale;
638       dat[1] /= scale;
639     }
640 
641   tau_i = gsl_linalg_householder_transform(&v2.vector);
642 
643   if (w->compute_t)
644     {
645       m = gsl_matrix_submatrix(w->H,
646                                top + N - 2,
647                                top + N - 3,
648                                2,
649                                w->size - top - N + 3);
650       gsl_linalg_householder_hm(tau_i, &v2.vector, &m.matrix);
651 
652       m = gsl_matrix_submatrix(w->H,
653                                0,
654                                top + N - 2,
655                                top + N,
656                                2);
657       gsl_linalg_householder_mh(tau_i, &v2.vector, &m.matrix);
658     }
659   else
660     {
661       m = gsl_matrix_submatrix(H, N - 2, N - 3, 2, 3);
662       gsl_linalg_householder_hm(tau_i, &v2.vector, &m.matrix);
663 
664       m = gsl_matrix_submatrix(H, 0, N - 2, N, 2);
665       gsl_linalg_householder_mh(tau_i, &v2.vector, &m.matrix);
666     }
667 
668   if (w->Z)
669     {
670       /* accumulate transformation into Z */
671       m = gsl_matrix_submatrix(w->Z, 0, top + N - 2, w->size, 2);
672       gsl_linalg_householder_mh(tau_i, &v2.vector, &m.matrix);
673     }
674 
675   return GSL_SUCCESS;
676 } /* francis_qrstep() */
677 
678 /*
679 francis_search_subdiag_small_elements()
680   Search for a small subdiagonal element starting from the bottom
681 of a matrix A. A small element is one that satisfies:
682 
683 |A_{i,i-1}| <= eps * (|A_{i,i}| + |A_{i-1,i-1}|)
684 
685 Inputs: A - matrix (must be at least 3-by-3)
686 
687 Return: row index of small subdiagonal element or 0 if not found
688 
689 Notes: the first small element that is found (starting from bottom)
690        is set to zero
691 */
692 
693 static inline size_t
francis_search_subdiag_small_elements(gsl_matrix * A)694 francis_search_subdiag_small_elements(gsl_matrix * A)
695 {
696   const size_t N = A->size1;
697   size_t i;
698   double dpel = gsl_matrix_get(A, N - 2, N - 2);
699 
700   for (i = N - 1; i > 0; --i)
701     {
702       double sel = gsl_matrix_get(A, i, i - 1);
703       double del = gsl_matrix_get(A, i, i);
704 
705       if ((sel == 0.0) ||
706           (fabs(sel) < GSL_DBL_EPSILON * (fabs(del) + fabs(dpel))))
707         {
708           gsl_matrix_set(A, i, i - 1, 0.0);
709           return (i);
710         }
711 
712       dpel = del;
713     }
714 
715   return (0);
716 } /* francis_search_subdiag_small_elements() */
717 
718 /*
719 francis_schur_standardize()
720   Convert a 2-by-2 diagonal block in the Schur form to standard form
721 and update the rest of T and Z matrices if required.
722 
723 Inputs: A     - 2-by-2 matrix
724         eval1 - where to store eigenvalue 1
725         eval2 - where to store eigenvalue 2
726         w     - francis workspace
727 */
728 
729 static inline void
francis_schur_standardize(gsl_matrix * A,gsl_complex * eval1,gsl_complex * eval2,gsl_eigen_francis_workspace * w)730 francis_schur_standardize(gsl_matrix *A, gsl_complex *eval1,
731                           gsl_complex *eval2,
732                           gsl_eigen_francis_workspace *w)
733 {
734   const size_t N = w->size;
735   double cs, sn;
736   size_t top;
737 
738   /*
739    * figure out where the submatrix A resides in the
740    * original matrix H
741    */
742   top = francis_get_submatrix(w->H, A);
743 
744   /* convert 2-by-2 block to standard form */
745   francis_standard_form(A, &cs, &sn);
746 
747   /* set eigenvalues */
748 
749   GSL_SET_REAL(eval1, gsl_matrix_get(A, 0, 0));
750   GSL_SET_REAL(eval2, gsl_matrix_get(A, 1, 1));
751   if (gsl_matrix_get(A, 1, 0) == 0.0)
752     {
753       GSL_SET_IMAG(eval1, 0.0);
754       GSL_SET_IMAG(eval2, 0.0);
755     }
756   else
757     {
758       double tmp = sqrt(fabs(gsl_matrix_get(A, 0, 1)) *
759                         fabs(gsl_matrix_get(A, 1, 0)));
760       GSL_SET_IMAG(eval1, tmp);
761       GSL_SET_IMAG(eval2, -tmp);
762     }
763 
764   if (w->compute_t)
765     {
766       gsl_vector_view xv, yv;
767 
768       /*
769        * The above call to francis_standard_form transformed a 2-by-2 block
770        * of T into upper triangular form via the transformation
771        *
772        * U = [ CS -SN ]
773        *     [ SN  CS ]
774        *
775        * The original matrix T was
776        *
777        * T = [ T_{11} | T_{12} | T_{13} ]
778        *     [   0*   |   A    | T_{23} ]
779        *     [   0    |   0*   | T_{33} ]
780        *
781        * where 0* indicates all zeros except for possibly
782        * one subdiagonal element next to A.
783        *
784        * After francis_standard_form, T looks like this:
785        *
786        * T = [ T_{11} | T_{12}  | T_{13} ]
787        *     [   0*   | U^t A U | T_{23} ]
788        *     [   0    |    0*   | T_{33} ]
789        *
790        * since only the 2-by-2 block of A was changed. However,
791        * in order to be able to back transform T at the end,
792        * we need to apply the U transformation to the rest
793        * of the matrix T since there is no way to apply a
794        * similarity transformation to T and change only the
795        * middle 2-by-2 block. In other words, let
796        *
797        * M = [ I 0 0 ]
798        *     [ 0 U 0 ]
799        *     [ 0 0 I ]
800        *
801        * and compute
802        *
803        * M^t T M = [ T_{11} | T_{12} U |   T_{13}   ]
804        *           [ U^t 0* | U^t A U  | U^t T_{23} ]
805        *           [   0    |   0* U   |   T_{33}   ]
806        *
807        * So basically we need to apply the transformation U
808        * to the i x 2 matrix T_{12} and the 2 x (n - i + 2)
809        * matrix T_{23}, where i is the index of the top of A
810        * in T.
811        *
812        * The BLAS routine drot() is suited for this.
813        */
814 
815       if (top < (N - 2))
816         {
817           /* transform the 2 rows of T_{23} */
818 
819           xv = gsl_matrix_subrow(w->H, top, top + 2, N - top - 2);
820           yv = gsl_matrix_subrow(w->H, top + 1, top + 2, N - top - 2);
821           gsl_blas_drot(&xv.vector, &yv.vector, cs, sn);
822         }
823 
824       if (top > 0)
825         {
826           /* transform the 2 columns of T_{12} */
827 
828           xv = gsl_matrix_subcolumn(w->H, top, 0, top);
829           yv = gsl_matrix_subcolumn(w->H, top + 1, 0, top);
830           gsl_blas_drot(&xv.vector, &yv.vector, cs, sn);
831         }
832     } /* if (w->compute_t) */
833 
834   if (w->Z)
835     {
836       gsl_vector_view xv, yv;
837 
838       /*
839        * Accumulate the transformation in Z. Here, Z -> Z * M
840        *
841        * So:
842        *
843        * Z -> [ Z_{11} | Z_{12} U | Z_{13} ]
844        *      [ Z_{21} | Z_{22} U | Z_{23} ]
845        *      [ Z_{31} | Z_{32} U | Z_{33} ]
846        *
847        * So we just need to apply drot() to the 2 columns
848        * starting at index 'top'
849        */
850 
851       xv = gsl_matrix_column(w->Z, top);
852       yv = gsl_matrix_column(w->Z, top + 1);
853 
854       gsl_blas_drot(&xv.vector, &yv.vector, cs, sn);
855     } /* if (w->Z) */
856 } /* francis_schur_standardize() */
857 
858 /*
859 francis_get_submatrix()
860   B is a submatrix of A. The goal of this function is to
861 compute the indices in A of where the matrix B resides
862 */
863 
864 static inline size_t
francis_get_submatrix(gsl_matrix * A,gsl_matrix * B)865 francis_get_submatrix(gsl_matrix *A, gsl_matrix *B)
866 {
867   size_t diff;
868   double ratio;
869   size_t top;
870 
871   diff = (size_t) (B->data - A->data);
872 
873   ratio = (double)diff / ((double) (A->tda + 1));
874 
875   top = (size_t) floor(ratio);
876 
877   return top;
878 } /* francis_get_submatrix() */
879 
880 /*
881 francis_standard_form()
882   Compute the Schur factorization of a real 2-by-2 matrix in
883 standard form:
884 
885 [ A B ] = [ CS -SN ] [ T11 T12 ] [ CS SN ]
886 [ C D ]   [ SN  CS ] [ T21 T22 ] [-SN CS ]
887 
888 where either:
889 1) T21 = 0 so that T11 and T22 are real eigenvalues of the matrix, or
890 2) T11 = T22 and T21*T12 < 0, so that T11 +/- sqrt(|T21*T12|) are
891    complex conjugate eigenvalues
892 
893 Inputs: A  - 2-by-2 matrix
894         cs - where to store cosine parameter of rotation matrix
895         sn - where to store sine parameter of rotation matrix
896 
897 Notes: 1) based on LAPACK routine DLANV2
898        2) On output, A is modified to contain the matrix in standard form
899 */
900 
901 static void
francis_standard_form(gsl_matrix * A,double * cs,double * sn)902 francis_standard_form(gsl_matrix *A, double *cs, double *sn)
903 {
904   double a, b, c, d; /* input matrix values */
905   double tmp;
906   double p, z;
907   double bcmax, bcmis, scale;
908   double tau, sigma;
909   double cs1, sn1;
910   double aa, bb, cc, dd;
911   double sab, sac;
912 
913   a = gsl_matrix_get(A, 0, 0);
914   b = gsl_matrix_get(A, 0, 1);
915   c = gsl_matrix_get(A, 1, 0);
916   d = gsl_matrix_get(A, 1, 1);
917 
918   if (c == 0.0)
919     {
920       /*
921        * matrix is already upper triangular - set rotation matrix
922        * to the identity
923        */
924       *cs = 1.0;
925       *sn = 0.0;
926     }
927   else if (b == 0.0)
928     {
929       /* swap rows and columns to make it upper triangular */
930 
931       *cs = 0.0;
932       *sn = 1.0;
933 
934       tmp = d;
935       d = a;
936       a = tmp;
937       b = -c;
938       c = 0.0;
939     }
940   else if (((a - d) == 0.0) && (GSL_SIGN(b) != GSL_SIGN(c)))
941     {
942       /* the matrix has complex eigenvalues with a == d */
943       *cs = 1.0;
944       *sn = 0.0;
945     }
946   else
947     {
948       tmp = a - d;
949       p = 0.5 * tmp;
950       bcmax = GSL_MAX(fabs(b), fabs(c));
951       bcmis = GSL_MIN(fabs(b), fabs(c)) * GSL_SIGN(b) * GSL_SIGN(c);
952       scale = GSL_MAX(fabs(p), bcmax);
953       z = (p / scale) * p + (bcmax / scale) * bcmis;
954 
955       if (z >= 4.0 * GSL_DBL_EPSILON)
956         {
957           /* real eigenvalues, compute a and d */
958 
959           z = p + GSL_SIGN(p) * fabs(sqrt(scale) * sqrt(z));
960           a = d + z;
961           d -= (bcmax / z) * bcmis;
962 
963           /* compute b and the rotation matrix */
964 
965           tau = gsl_hypot(c, z);
966           *cs = z / tau;
967           *sn = c / tau;
968           b -= c;
969           c = 0.0;
970         }
971       else
972         {
973           /*
974            * complex eigenvalues, or real (almost) equal eigenvalues -
975            * make diagonal elements equal
976            */
977 
978           sigma = b + c;
979           tau = gsl_hypot(sigma, tmp);
980           *cs = sqrt(0.5 * (1.0 + fabs(sigma) / tau));
981           *sn = -(p / (tau * (*cs))) * GSL_SIGN(sigma);
982 
983           /*
984            * Compute [ AA BB ] = [ A B ] [ CS -SN ]
985            *         [ CC DD ]   [ C D ] [ SN  CS ]
986            */
987           aa = a * (*cs) + b * (*sn);
988           bb = -a * (*sn) + b * (*cs);
989           cc = c * (*cs) + d * (*sn);
990           dd = -c * (*sn) + d * (*cs);
991 
992           /*
993            * Compute [ A B ] = [ CS SN ] [ AA BB ]
994            *         [ C D ]   [-SN CS ] [ CC DD ]
995            */
996           a = aa * (*cs) + cc * (*sn);
997           b = bb * (*cs) + dd * (*sn);
998           c = -aa * (*sn) + cc * (*cs);
999           d = -bb * (*sn) + dd * (*cs);
1000 
1001           tmp = 0.5 * (a + d);
1002           a = d = tmp;
1003 
1004           if (c != 0.0)
1005             {
1006               if (b != 0.0)
1007                 {
1008                   if (GSL_SIGN(b) == GSL_SIGN(c))
1009                     {
1010                       /*
1011                        * real eigenvalues: reduce to upper triangular
1012                        * form
1013                        */
1014                       sab = sqrt(fabs(b));
1015                       sac = sqrt(fabs(c));
1016                       p = GSL_SIGN(c) * fabs(sab * sac);
1017                       tau = 1.0 / sqrt(fabs(b + c));
1018                       a = tmp + p;
1019                       d = tmp - p;
1020                       b -= c;
1021                       c = 0.0;
1022 
1023                       cs1 = sab * tau;
1024                       sn1 = sac * tau;
1025                       tmp = (*cs) * cs1 - (*sn) * sn1;
1026                       *sn = (*cs) * sn1 + (*sn) * cs1;
1027                       *cs = tmp;
1028                     }
1029                 }
1030               else
1031                 {
1032                   b = -c;
1033                   c = 0.0;
1034                   tmp = *cs;
1035                   *cs = -(*sn);
1036                   *sn = tmp;
1037                 }
1038             }
1039         }
1040     }
1041 
1042   /* set new matrix elements */
1043 
1044   gsl_matrix_set(A, 0, 0, a);
1045   gsl_matrix_set(A, 0, 1, b);
1046   gsl_matrix_set(A, 1, 0, c);
1047   gsl_matrix_set(A, 1, 1, d);
1048 } /* francis_standard_form() */
1049