1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD"
5 // research report written by Ming Gu and Stanley C.Eisenstat
6 // The code variable names correspond to the names they used in their
7 // report
8 //
9 // Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
10 // Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
11 // Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
12 // Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
13 // Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
14 // Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
15 //
16 // Source Code Form is subject to the terms of the Mozilla
17 // Public License v. 2.0. If a copy of the MPL was not distributed
18 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
19 
20 #ifndef EIGEN_BDCSVD_H
21 #define EIGEN_BDCSVD_H
22 // #define EIGEN_BDCSVD_DEBUG_VERBOSE
23 // #define EIGEN_BDCSVD_SANITY_CHECKS
24 
25 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
26 #undef eigen_internal_assert
27 #define eigen_internal_assert(X) assert(X);
28 #endif
29 
30 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
31 #include <iostream>
32 #endif
33 
34 namespace Eigen {
35 
36 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
37 IOFormat bdcsvdfmt(8, 0, ", ", "\n", "  [", "]");
38 #endif
39 
40 template<typename _MatrixType> class BDCSVD;
41 
42 namespace internal {
43 
44 template<typename _MatrixType>
45 struct traits<BDCSVD<_MatrixType> >
46         : traits<_MatrixType>
47 {
48   typedef _MatrixType MatrixType;
49 };
50 
51 } // end namespace internal
52 
53 
54 /** \ingroup SVD_Module
55  *
56  *
57  * \class BDCSVD
58  *
59  * \brief class Bidiagonal Divide and Conquer SVD
60  *
61  * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
62  *
63  * This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization,
64  * and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD.
65  * You can control the switching size with the setSwitchSize() method, default is 16.
66  * For small matrice (<16), it is thus preferable to directly use JacobiSVD. For larger ones, BDCSVD is highly
67  * recommended and can several order of magnitude faster.
68  *
69  * \warning this algorithm is unlikely to provide accurate result when compiled with unsafe math optimizations.
70  * For instance, this concerns Intel's compiler (ICC), which performs such optimization by default unless
71  * you compile with the \c -fp-model \c precise option. Likewise, the \c -ffast-math option of GCC or clang will
72  * significantly degrade the accuracy.
73  *
74  * \sa class JacobiSVD
75  */
76 template<typename _MatrixType>
77 class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
78 {
79   typedef SVDBase<BDCSVD> Base;
80 
81 public:
82   using Base::rows;
83   using Base::cols;
84   using Base::computeU;
85   using Base::computeV;
86 
87   typedef _MatrixType MatrixType;
88   typedef typename MatrixType::Scalar Scalar;
89   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
90   typedef typename NumTraits<RealScalar>::Literal Literal;
91   enum {
92     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
93     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
94     DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime),
95     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
96     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
97     MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime),
98     MatrixOptions = MatrixType::Options
99   };
100 
101   typedef typename Base::MatrixUType MatrixUType;
102   typedef typename Base::MatrixVType MatrixVType;
103   typedef typename Base::SingularValuesType SingularValuesType;
104 
105   typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> MatrixX;
106   typedef Matrix<RealScalar, Dynamic, Dynamic, ColMajor> MatrixXr;
107   typedef Matrix<RealScalar, Dynamic, 1> VectorType;
108   typedef Array<RealScalar, Dynamic, 1> ArrayXr;
109   typedef Array<Index,1,Dynamic> ArrayXi;
110   typedef Ref<ArrayXr> ArrayRef;
111   typedef Ref<ArrayXi> IndicesRef;
112 
113   /** \brief Default Constructor.
114    *
115    * The default constructor is useful in cases in which the user intends to
116    * perform decompositions via BDCSVD::compute(const MatrixType&).
117    */
118   BDCSVD() : m_algoswap(16), m_isTranspose(false), m_compU(false), m_compV(false), m_numIters(0)
119   {}
120 
121 
122   /** \brief Default Constructor with memory preallocation
123    *
124    * Like the default constructor but with preallocation of the internal data
125    * according to the specified problem size.
126    * \sa BDCSVD()
127    */
128   BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)
129     : m_algoswap(16), m_numIters(0)
130   {
131     allocate(rows, cols, computationOptions);
132   }
133 
134   /** \brief Constructor performing the decomposition of given matrix.
135    *
136    * \param matrix the matrix to decompose
137    * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
138    *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
139    *                           #ComputeFullV, #ComputeThinV.
140    *
141    * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
142    * available with the (non - default) FullPivHouseholderQR preconditioner.
143    */
144   BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
145     : m_algoswap(16), m_numIters(0)
146   {
147     compute(matrix, computationOptions);
148   }
149 
150   ~BDCSVD()
151   {
152   }
153 
154   /** \brief Method performing the decomposition of given matrix using custom options.
155    *
156    * \param matrix the matrix to decompose
157    * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
158    *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
159    *                           #ComputeFullV, #ComputeThinV.
160    *
161    * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
162    * available with the (non - default) FullPivHouseholderQR preconditioner.
163    */
164   BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
165 
166   /** \brief Method performing the decomposition of given matrix using current options.
167    *
168    * \param matrix the matrix to decompose
169    *
170    * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
171    */
172   BDCSVD& compute(const MatrixType& matrix)
173   {
174     return compute(matrix, this->m_computationOptions);
175   }
176 
177   void setSwitchSize(int s)
178   {
179     eigen_assert(s>=3 && "BDCSVD the size of the algo switch has to be at least 3.");
180     m_algoswap = s;
181   }
182 
183 private:
184   void allocate(Index rows, Index cols, unsigned int computationOptions);
185   void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
186   void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
187   void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
188   void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);
189   void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);
190   void deflation43(Index firstCol, Index shift, Index i, Index size);
191   void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
192   void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
193   template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
194   void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
195   void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
196   static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
197 
198 protected:
199   MatrixXr m_naiveU, m_naiveV;
200   MatrixXr m_computed;
201   Index m_nRec;
202   ArrayXr m_workspace;
203   ArrayXi m_workspaceI;
204   int m_algoswap;
205   bool m_isTranspose, m_compU, m_compV;
206 
207   using Base::m_singularValues;
208   using Base::m_diagSize;
209   using Base::m_computeFullU;
210   using Base::m_computeFullV;
211   using Base::m_computeThinU;
212   using Base::m_computeThinV;
213   using Base::m_matrixU;
214   using Base::m_matrixV;
215   using Base::m_info;
216   using Base::m_isInitialized;
217   using Base::m_nonzeroSingularValues;
218 
219 public:
220   int m_numIters;
221 }; //end class BDCSVD
222 
223 
224 // Method to allocate and initialize matrix and attributes
225 template<typename MatrixType>
226 void BDCSVD<MatrixType>::allocate(Eigen::Index rows, Eigen::Index cols, unsigned int computationOptions)
227 {
228   m_isTranspose = (cols > rows);
229 
230   if (Base::allocate(rows, cols, computationOptions))
231     return;
232 
233   m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );
234   m_compU = computeV();
235   m_compV = computeU();
236   if (m_isTranspose)
237     std::swap(m_compU, m_compV);
238 
239   if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );
240   else         m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
241 
242   if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
243 
244   m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);
245   m_workspaceI.resize(3*m_diagSize);
246 }// end allocate
247 
248 template<typename MatrixType>
249 BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions)
250 {
251 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
252   std::cout << "\n\n\n======================================================================================================================\n\n\n";
253 #endif
254   allocate(matrix.rows(), matrix.cols(), computationOptions);
255   using std::abs;
256 
257   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
258 
259   //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
260   if(matrix.cols() < m_algoswap)
261   {
262     // FIXME this line involves temporaries
263     JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
264     m_isInitialized = true;
265     m_info = jsvd.info();
266     if (m_info == Success || m_info == NoConvergence) {
267       if(computeU()) m_matrixU = jsvd.matrixU();
268       if(computeV()) m_matrixV = jsvd.matrixV();
269       m_singularValues = jsvd.singularValues();
270       m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
271     }
272     return *this;
273   }
274 
275   //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
276   RealScalar scale = matrix.cwiseAbs().template maxCoeff<PropagateNaN>();
277   if (!(numext::isfinite)(scale)) {
278     m_isInitialized = true;
279     m_info = InvalidInput;
280     return *this;
281   }
282 
283   if(scale==Literal(0)) scale = Literal(1);
284   MatrixX copy;
285   if (m_isTranspose) copy = matrix.adjoint()/scale;
286   else               copy = matrix/scale;
287 
288   //**** step 1 - Bidiagonalization
289   // FIXME this line involves temporaries
290   internal::UpperBidiagonalization<MatrixX> bid(copy);
291 
292   //**** step 2 - Divide & Conquer
293   m_naiveU.setZero();
294   m_naiveV.setZero();
295   // FIXME this line involves a temporary matrix
296   m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
297   m_computed.template bottomRows<1>().setZero();
298   divide(0, m_diagSize - 1, 0, 0, 0);
299   if (m_info != Success && m_info != NoConvergence) {
300     m_isInitialized = true;
301     return *this;
302   }
303 
304   //**** step 3 - Copy singular values and vectors
305   for (int i=0; i<m_diagSize; i++)
306   {
307     RealScalar a = abs(m_computed.coeff(i, i));
308     m_singularValues.coeffRef(i) = a * scale;
309     if (a<considerZero)
310     {
311       m_nonzeroSingularValues = i;
312       m_singularValues.tail(m_diagSize - i - 1).setZero();
313       break;
314     }
315     else if (i == m_diagSize - 1)
316     {
317       m_nonzeroSingularValues = i + 1;
318       break;
319     }
320   }
321 
322 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
323 //   std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
324 //   std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
325 #endif
326   if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);
327   else              copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);
328 
329   m_isInitialized = true;
330   return *this;
331 }// end compute
332 
333 
334 template<typename MatrixType>
335 template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
336 void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)
337 {
338   // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
339   if (computeU())
340   {
341     Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
342     m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
343     m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
344     householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
345   }
346   if (computeV())
347   {
348     Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
349     m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
350     m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
351     householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
352   }
353 }
354 
355 /** \internal
356   * Performs A = A * B exploiting the special structure of the matrix A. Splitting A as:
357   *  A = [A1]
358   *      [A2]
359   * such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros.
360   * We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large
361   * enough.
362   */
363 template<typename MatrixType>
364 void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)
365 {
366   Index n = A.rows();
367   if(n>100)
368   {
369     // If the matrices are large enough, let's exploit the sparse structure of A by
370     // splitting it in half (wrt n1), and packing the non-zero columns.
371     Index n2 = n - n1;
372     Map<MatrixXr> A1(m_workspace.data()      , n1, n);
373     Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);
374     Map<MatrixXr> B1(m_workspace.data()+  n*n, n,  n);
375     Map<MatrixXr> B2(m_workspace.data()+2*n*n, n,  n);
376     Index k1=0, k2=0;
377     for(Index j=0; j<n; ++j)
378     {
379       if( (A.col(j).head(n1).array()!=Literal(0)).any() )
380       {
381         A1.col(k1) = A.col(j).head(n1);
382         B1.row(k1) = B.row(j);
383         ++k1;
384       }
385       if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
386       {
387         A2.col(k2) = A.col(j).tail(n2);
388         B2.row(k2) = B.row(j);
389         ++k2;
390       }
391     }
392 
393     A.topRows(n1).noalias()    = A1.leftCols(k1) * B1.topRows(k1);
394     A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
395   }
396   else
397   {
398     Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);
399     tmp.noalias() = A*B;
400     A = tmp;
401   }
402 }
403 
404 // The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
405 // place of the submatrix we are currently working on.
406 
407 //@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;
408 //@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU;
409 // lastCol + 1 - firstCol is the size of the submatrix.
410 //@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)
411 //@param firstColW : Same as firstRowW with the column.
412 //@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix
413 // to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
414 template<typename MatrixType>
415 void BDCSVD<MatrixType>::divide(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)
416 {
417   // requires rows = cols + 1;
418   using std::pow;
419   using std::sqrt;
420   using std::abs;
421   const Index n = lastCol - firstCol + 1;
422   const Index k = n/2;
423   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
424   RealScalar alphaK;
425   RealScalar betaK;
426   RealScalar r0;
427   RealScalar lambda, phi, c0, s0;
428   VectorType l, f;
429   // We use the other algorithm which is more efficient for small
430   // matrices.
431   if (n < m_algoswap)
432   {
433     // FIXME this line involves temporaries
434     JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
435     m_info = b.info();
436     if (m_info != Success && m_info != NoConvergence) return;
437     if (m_compU)
438       m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
439     else
440     {
441       m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
442       m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);
443     }
444     if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();
445     m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
446     m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);
447     return;
448   }
449   // We use the divide and conquer algorithm
450   alphaK =  m_computed(firstCol + k, firstCol + k);
451   betaK = m_computed(firstCol + k + 1, firstCol + k);
452   // The divide must be done in that order in order to have good results. Divide change the data inside the submatrices
453   // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the
454   // right submatrix before the left one.
455   divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
456   if (m_info != Success && m_info != NoConvergence) return;
457   divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
458   if (m_info != Success && m_info != NoConvergence) return;
459 
460   if (m_compU)
461   {
462     lambda = m_naiveU(firstCol + k, firstCol + k);
463     phi = m_naiveU(firstCol + k + 1, lastCol + 1);
464   }
465   else
466   {
467     lambda = m_naiveU(1, firstCol + k);
468     phi = m_naiveU(0, lastCol + 1);
469   }
470   r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi));
471   if (m_compU)
472   {
473     l = m_naiveU.row(firstCol + k).segment(firstCol, k);
474     f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);
475   }
476   else
477   {
478     l = m_naiveU.row(1).segment(firstCol, k);
479     f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
480   }
481   if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
482   if (r0<considerZero)
483   {
484     c0 = Literal(1);
485     s0 = Literal(0);
486   }
487   else
488   {
489     c0 = alphaK * lambda / r0;
490     s0 = betaK * phi / r0;
491   }
492 
493 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
494   assert(m_naiveU.allFinite());
495   assert(m_naiveV.allFinite());
496   assert(m_computed.allFinite());
497 #endif
498 
499   if (m_compU)
500   {
501     MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));
502     // we shiftW Q1 to the right
503     for (Index i = firstCol + k - 1; i >= firstCol; i--)
504       m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1);
505     // we shift q1 at the left with a factor c0
506     m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0);
507     // last column = q1 * - s0
508     m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0));
509     // first column = q2 * s0
510     m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0;
511     // q2 *= c0
512     m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;
513   }
514   else
515   {
516     RealScalar q1 = m_naiveU(0, firstCol + k);
517     // we shift Q1 to the right
518     for (Index i = firstCol + k - 1; i >= firstCol; i--)
519       m_naiveU(0, i + 1) = m_naiveU(0, i);
520     // we shift q1 at the left with a factor c0
521     m_naiveU(0, firstCol) = (q1 * c0);
522     // last column = q1 * - s0
523     m_naiveU(0, lastCol + 1) = (q1 * ( - s0));
524     // first column = q2 * s0
525     m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0;
526     // q2 *= c0
527     m_naiveU(1, lastCol + 1) *= c0;
528     m_naiveU.row(1).segment(firstCol + 1, k).setZero();
529     m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();
530   }
531 
532 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
533   assert(m_naiveU.allFinite());
534   assert(m_naiveV.allFinite());
535   assert(m_computed.allFinite());
536 #endif
537 
538   m_computed(firstCol + shift, firstCol + shift) = r0;
539   m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real();
540   m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real();
541 
542 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
543   ArrayXr tmp1 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
544 #endif
545   // Second part: try to deflate singular values in combined matrix
546   deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);
547 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
548   ArrayXr tmp2 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
549   std::cout << "\n\nj1 = " << tmp1.transpose().format(bdcsvdfmt) << "\n";
550   std::cout << "j2 = " << tmp2.transpose().format(bdcsvdfmt) << "\n\n";
551   std::cout << "err:      " << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << "\n";
552   static int count = 0;
553   std::cout << "# " << ++count << "\n\n";
554   assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
555 //   assert(count<681);
556 //   assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
557 #endif
558 
559   // Third part: compute SVD of combined matrix
560   MatrixXr UofSVD, VofSVD;
561   VectorType singVals;
562   computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
563 
564 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
565   assert(UofSVD.allFinite());
566   assert(VofSVD.allFinite());
567 #endif
568 
569   if (m_compU)
570     structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
571   else
572   {
573     Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1);
574     tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;
575     m_naiveU.middleCols(firstCol, n + 1) = tmp;
576   }
577 
578   if (m_compV)  structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
579 
580 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
581   assert(m_naiveU.allFinite());
582   assert(m_naiveV.allFinite());
583   assert(m_computed.allFinite());
584 #endif
585 
586   m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
587   m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
588 }// end divide
589 
590 // Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
591 // the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
592 // order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
593 // that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
594 //
595 // TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
596 // handling of round-off errors, be consistent in ordering
597 // For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
598 template <typename MatrixType>
599 void BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
600 {
601   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
602   using std::abs;
603   ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
604   m_workspace.head(n) =  m_computed.block(firstCol, firstCol, n, n).diagonal();
605   ArrayRef diag = m_workspace.head(n);
606   diag(0) = Literal(0);
607 
608   // Allocate space for singular values and vectors
609   singVals.resize(n);
610   U.resize(n+1, n+1);
611   if (m_compV) V.resize(n, n);
612 
613 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
614   if (col0.hasNaN() || diag.hasNaN())
615     std::cout << "\n\nHAS NAN\n\n";
616 #endif
617 
618   // Many singular values might have been deflated, the zero ones have been moved to the end,
619   // but others are interleaved and we must ignore them at this stage.
620   // To this end, let's compute a permutation skipping them:
621   Index actual_n = n;
622   while(actual_n>1 && diag(actual_n-1)==Literal(0)) {--actual_n; eigen_internal_assert(col0(actual_n)==Literal(0)); }
623   Index m = 0; // size of the deflated problem
624   for(Index k=0;k<actual_n;++k)
625     if(abs(col0(k))>considerZero)
626       m_workspaceI(m++) = k;
627   Map<ArrayXi> perm(m_workspaceI.data(),m);
628 
629   Map<ArrayXr> shifts(m_workspace.data()+1*n, n);
630   Map<ArrayXr> mus(m_workspace.data()+2*n, n);
631   Map<ArrayXr> zhat(m_workspace.data()+3*n, n);
632 
633 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
634   std::cout << "computeSVDofM using:\n";
635   std::cout << "  z: " << col0.transpose() << "\n";
636   std::cout << "  d: " << diag.transpose() << "\n";
637 #endif
638 
639   // Compute singVals, shifts, and mus
640   computeSingVals(col0, diag, perm, singVals, shifts, mus);
641 
642 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
643   std::cout << "  j:        " << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << "\n\n";
644   std::cout << "  sing-val: " << singVals.transpose() << "\n";
645   std::cout << "  mu:       " << mus.transpose() << "\n";
646   std::cout << "  shift:    " << shifts.transpose() << "\n";
647 
648   {
649     std::cout << "\n\n    mus:    " << mus.head(actual_n).transpose() << "\n\n";
650     std::cout << "    check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
651     assert((((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n) >= 0).all());
652     std::cout << "    check2 (>0)      : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
653     assert((((singVals.array()-diag) / singVals.array()).head(actual_n) >= 0).all());
654   }
655 #endif
656 
657 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
658   assert(singVals.allFinite());
659   assert(mus.allFinite());
660   assert(shifts.allFinite());
661 #endif
662 
663   // Compute zhat
664   perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat);
665 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
666   std::cout << "  zhat: " << zhat.transpose() << "\n";
667 #endif
668 
669 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
670   assert(zhat.allFinite());
671 #endif
672 
673   computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);
674 
675 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
676   std::cout << "U^T U: " << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << "\n";
677   std::cout << "V^T V: " << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << "\n";
678 #endif
679 
680 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
681   assert(m_naiveU.allFinite());
682   assert(m_naiveV.allFinite());
683   assert(m_computed.allFinite());
684   assert(U.allFinite());
685   assert(V.allFinite());
686 //   assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
687 //   assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
688 #endif
689 
690   // Because of deflation, the singular values might not be completely sorted.
691   // Fortunately, reordering them is a O(n) problem
692   for(Index i=0; i<actual_n-1; ++i)
693   {
694     if(singVals(i)>singVals(i+1))
695     {
696       using std::swap;
697       swap(singVals(i),singVals(i+1));
698       U.col(i).swap(U.col(i+1));
699       if(m_compV) V.col(i).swap(V.col(i+1));
700     }
701   }
702 
703 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
704   {
705     bool singular_values_sorted = (((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).array() >= 0).all();
706     if(!singular_values_sorted)
707       std::cout << "Singular values are not sorted: " << singVals.segment(1,actual_n).transpose() << "\n";
708     assert(singular_values_sorted);
709   }
710 #endif
711 
712   // Reverse order so that singular values in increased order
713   // Because of deflation, the zeros singular-values are already at the end
714   singVals.head(actual_n).reverseInPlace();
715   U.leftCols(actual_n).rowwise().reverseInPlace();
716   if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
717 
718 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
719   JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
720   std::cout << "  * j:        " << jsvd.singularValues().transpose() << "\n\n";
721   std::cout << "  * sing-val: " << singVals.transpose() << "\n";
722 //   std::cout << "  * err:      " << ((jsvd.singularValues()-singVals)>1e-13*singVals.norm()).transpose() << "\n";
723 #endif
724 }
725 
726 template <typename MatrixType>
727 typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
728 {
729   Index m = perm.size();
730   RealScalar res = Literal(1);
731   for(Index i=0; i<m; ++i)
732   {
733     Index j = perm(i);
734     // The following expression could be rewritten to involve only a single division,
735     // but this would make the expression more sensitive to overflow.
736     res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
737   }
738   return res;
739 
740 }
741 
742 template <typename MatrixType>
743 void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
744                                          VectorType& singVals, ArrayRef shifts, ArrayRef mus)
745 {
746   using std::abs;
747   using std::swap;
748   using std::sqrt;
749 
750   Index n = col0.size();
751   Index actual_n = n;
752   // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
753   // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
754   while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
755 
756   for (Index k = 0; k < n; ++k)
757   {
758     if (col0(k) == Literal(0) || actual_n==1)
759     {
760       // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
761       // if actual_n==1, then the deflated problem is already diagonalized
762       singVals(k) = k==0 ? col0(0) : diag(k);
763       mus(k) = Literal(0);
764       shifts(k) = k==0 ? col0(0) : diag(k);
765       continue;
766     }
767 
768     // otherwise, use secular equation to find singular value
769     RealScalar left = diag(k);
770     RealScalar right; // was: = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm());
771     if(k==actual_n-1)
772       right = (diag(actual_n-1) + col0.matrix().norm());
773     else
774     {
775       // Skip deflated singular values,
776       // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
777       // This should be equivalent to using perm[]
778       Index l = k+1;
779       while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
780       right = diag(l);
781     }
782 
783     // first decide whether it's closer to the left end or the right end
784     RealScalar mid = left + (right-left) / Literal(2);
785     RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
786 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
787     std::cout << "right-left = " << right-left << "\n";
788 //     std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, ArrayXr(diag-left), left)
789 //                            << " " << secularEq(mid-right, col0, diag, perm, ArrayXr(diag-right), right)   << "\n";
790     std::cout << "     = " << secularEq(left+RealScalar(0.000001)*(right-left), col0, diag, perm, diag, 0)
791               << " "       << secularEq(left+RealScalar(0.1)     *(right-left), col0, diag, perm, diag, 0)
792               << " "       << secularEq(left+RealScalar(0.2)     *(right-left), col0, diag, perm, diag, 0)
793               << " "       << secularEq(left+RealScalar(0.3)     *(right-left), col0, diag, perm, diag, 0)
794               << " "       << secularEq(left+RealScalar(0.4)     *(right-left), col0, diag, perm, diag, 0)
795               << " "       << secularEq(left+RealScalar(0.49)    *(right-left), col0, diag, perm, diag, 0)
796               << " "       << secularEq(left+RealScalar(0.5)     *(right-left), col0, diag, perm, diag, 0)
797               << " "       << secularEq(left+RealScalar(0.51)    *(right-left), col0, diag, perm, diag, 0)
798               << " "       << secularEq(left+RealScalar(0.6)     *(right-left), col0, diag, perm, diag, 0)
799               << " "       << secularEq(left+RealScalar(0.7)     *(right-left), col0, diag, perm, diag, 0)
800               << " "       << secularEq(left+RealScalar(0.8)     *(right-left), col0, diag, perm, diag, 0)
801               << " "       << secularEq(left+RealScalar(0.9)     *(right-left), col0, diag, perm, diag, 0)
802               << " "       << secularEq(left+RealScalar(0.999999)*(right-left), col0, diag, perm, diag, 0) << "\n";
803 #endif
804     RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
805 
806     // measure everything relative to shift
807     Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
808     diagShifted = diag - shift;
809 
810     if(k!=actual_n-1)
811     {
812       // check that after the shift, f(mid) is still negative:
813       RealScalar midShifted = (right - left) / RealScalar(2);
814       if(shift==right)
815         midShifted = -midShifted;
816       RealScalar fMidShifted = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
817       if(fMidShifted>0)
818       {
819         // fMid was erroneous, fix it:
820         shift =  fMidShifted > Literal(0) ? left : right;
821         diagShifted = diag - shift;
822       }
823     }
824 
825     // initial guess
826     RealScalar muPrev, muCur;
827     if (shift == left)
828     {
829       muPrev = (right - left) * RealScalar(0.1);
830       if (k == actual_n-1) muCur = right - left;
831       else                 muCur = (right - left) * RealScalar(0.5);
832     }
833     else
834     {
835       muPrev = -(right - left) * RealScalar(0.1);
836       muCur = -(right - left) * RealScalar(0.5);
837     }
838 
839     RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift);
840     RealScalar fCur = secularEq(muCur, col0, diag, perm, diagShifted, shift);
841     if (abs(fPrev) < abs(fCur))
842     {
843       swap(fPrev, fCur);
844       swap(muPrev, muCur);
845     }
846 
847     // rational interpolation: fit a function of the form a / mu + b through the two previous
848     // iterates and use its zero to compute the next iterate
849     bool useBisection = fPrev*fCur>Literal(0);
850     while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
851     {
852       ++m_numIters;
853 
854       // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
855       RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
856       RealScalar b = fCur - a / muCur;
857       // And find mu such that f(mu)==0:
858       RealScalar muZero = -a/b;
859       RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);
860 
861 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
862       assert((numext::isfinite)(fZero));
863 #endif
864 
865       muPrev = muCur;
866       fPrev = fCur;
867       muCur = muZero;
868       fCur = fZero;
869 
870       if (shift == left  && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
871       if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
872       if (abs(fCur)>abs(fPrev)) useBisection = true;
873     }
874 
875     // fall back on bisection method if rational interpolation did not work
876     if (useBisection)
877     {
878 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
879       std::cout << "useBisection for k = " << k << ", actual_n = " << actual_n << "\n";
880 #endif
881       RealScalar leftShifted, rightShifted;
882       if (shift == left)
883       {
884         // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
885         // the factor 2 is to be more conservative
886         leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
887 
888         // check that we did it right:
889         eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
890         // I don't understand why the case k==0 would be special there:
891         // if (k == 0) rightShifted = right - left; else
892         rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
893       }
894       else
895       {
896         leftShifted = -(right - left) * RealScalar(0.51);
897         if(k+1<n)
898           rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
899         else
900           rightShifted = -(std::numeric_limits<RealScalar>::min)();
901       }
902 
903       RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
904       eigen_internal_assert(fLeft<Literal(0));
905 
906 #if defined EIGEN_BDCSVD_DEBUG_VERBOSE || defined EIGEN_BDCSVD_SANITY_CHECKS || defined EIGEN_INTERNAL_DEBUGGING
907       RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
908 #endif
909 
910 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
911       if(!(numext::isfinite)(fLeft))
912         std::cout << "f(" << leftShifted << ") =" << fLeft << " ; " << left << " " << shift << " " << right << "\n";
913       assert((numext::isfinite)(fLeft));
914 
915       if(!(numext::isfinite)(fRight))
916         std::cout << "f(" << rightShifted << ") =" << fRight << " ; " << left << " " << shift << " " << right << "\n";
917       // assert((numext::isfinite)(fRight));
918 #endif
919 
920 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
921       if(!(fLeft * fRight<0))
922       {
923         std::cout << "f(leftShifted) using  leftShifted=" << leftShifted << " ;  diagShifted(1:10):" << diagShifted.head(10).transpose()  << "\n ; "
924                   << "left==shift=" << bool(left==shift) << " ; left-shift = " << (left-shift) << "\n";
925         std::cout << "k=" << k << ", " <<  fLeft << " * " << fRight << " == " << fLeft * fRight << "  ;  "
926                   << "[" << left << " .. " << right << "] -> [" << leftShifted << " " << rightShifted << "], shift=" << shift
927                   << " ,  f(right)=" << secularEq(0,     col0, diag, perm, diagShifted, shift)
928                            << " == " << secularEq(right, col0, diag, perm, diag, 0) << " == " << fRight << "\n";
929       }
930 #endif
931       eigen_internal_assert(fLeft * fRight < Literal(0));
932 
933       if(fLeft<Literal(0))
934       {
935         while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
936         {
937           RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
938           fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
939           eigen_internal_assert((numext::isfinite)(fMid));
940 
941           if (fLeft * fMid < Literal(0))
942           {
943             rightShifted = midShifted;
944           }
945           else
946           {
947             leftShifted = midShifted;
948             fLeft = fMid;
949           }
950         }
951         muCur = (leftShifted + rightShifted) / Literal(2);
952       }
953       else
954       {
955         // We have a problem as shifting on the left or right give either a positive or negative value
956         // at the middle of [left,right]...
957         // Instead fo abbording or entering an infinite loop,
958         // let's just use the middle as the estimated zero-crossing:
959         muCur = (right - left) * RealScalar(0.5);
960         if(shift == right)
961           muCur = -muCur;
962       }
963     }
964 
965     singVals[k] = shift + muCur;
966     shifts[k] = shift;
967     mus[k] = muCur;
968 
969 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
970     if(k+1<n)
971       std::cout << "found " << singVals[k] << " == " << shift << " + " << muCur << " from " << diag(k) << " .. "  << diag(k+1) << "\n";
972 #endif
973 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
974     assert(k==0 || singVals[k]>=singVals[k-1]);
975     assert(singVals[k]>=diag(k));
976 #endif
977 
978     // perturb singular value slightly if it equals diagonal entry to avoid division by zero later
979     // (deflation is supposed to avoid this from happening)
980     // - this does no seem to be necessary anymore -
981 //     if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();
982 //     if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();
983   }
984 }
985 
986 
987 // zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
988 template <typename MatrixType>
989 void BDCSVD<MatrixType>::perturbCol0
990    (const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
991     const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
992 {
993   using std::sqrt;
994   Index n = col0.size();
995   Index m = perm.size();
996   if(m==0)
997   {
998     zhat.setZero();
999     return;
1000   }
1001   Index lastIdx = perm(m-1);
1002   // The offset permits to skip deflated entries while computing zhat
1003   for (Index k = 0; k < n; ++k)
1004   {
1005     if (col0(k) == Literal(0)) // deflated
1006       zhat(k) = Literal(0);
1007     else
1008     {
1009       // see equation (3.6)
1010       RealScalar dk = diag(k);
1011       RealScalar prod = (singVals(lastIdx) + dk) * (mus(lastIdx) + (shifts(lastIdx) - dk));
1012 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1013       if(prod<0) {
1014         std::cout << "k = " << k << " ;  z(k)=" << col0(k) << ", diag(k)=" << dk << "\n";
1015         std::cout << "prod = " << "(" << singVals(lastIdx) << " + " << dk << ") * (" << mus(lastIdx) << " + (" << shifts(lastIdx) << " - " << dk << "))" << "\n";
1016         std::cout << "     = " << singVals(lastIdx) + dk << " * " << mus(lastIdx) + (shifts(lastIdx) - dk) <<  "\n";
1017       }
1018       assert(prod>=0);
1019 #endif
1020 
1021       for(Index l = 0; l<m; ++l)
1022       {
1023         Index i = perm(l);
1024         if(i!=k)
1025         {
1026 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1027           if(i>=k && (l==0 || l-1>=m))
1028           {
1029             std::cout << "Error in perturbCol0\n";
1030             std::cout << "  " << k << "/" << n << " "  << l << "/" << m << " " << i << "/" << n << " ; " << col0(k) << " " << diag(k) << " "  <<  "\n";
1031             std::cout << "  " <<diag(i) << "\n";
1032             Index j = (i<k /*|| l==0*/) ? i : perm(l-1);
1033             std::cout << "  " << "j=" << j << "\n";
1034           }
1035 #endif
1036           Index j = i<k ? i : perm(l-1);
1037 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1038           if(!(dk!=Literal(0) || diag(i)!=Literal(0)))
1039           {
1040             std::cout << "k=" << k << ", i=" << i << ", l=" << l << ", perm.size()=" << perm.size() << "\n";
1041           }
1042           assert(dk!=Literal(0) || diag(i)!=Literal(0));
1043 #endif
1044           prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
1045 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1046           assert(prod>=0);
1047 #endif
1048 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1049           if(i!=k && numext::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )
1050             std::cout << "     " << ((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk))
1051                        << ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n";
1052 #endif
1053         }
1054       }
1055 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1056       std::cout << "zhat(" << k << ") =  sqrt( " << prod << ")  ;  " << (singVals(lastIdx) + dk) << " * " << mus(lastIdx) + shifts(lastIdx) << " - " << dk << "\n";
1057 #endif
1058       RealScalar tmp = sqrt(prod);
1059 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1060       assert((numext::isfinite)(tmp));
1061 #endif
1062       zhat(k) = col0(k) > Literal(0) ? RealScalar(tmp) : RealScalar(-tmp);
1063     }
1064   }
1065 }
1066 
1067 // compute singular vectors
1068 template <typename MatrixType>
1069 void BDCSVD<MatrixType>::computeSingVecs
1070    (const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
1071     const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
1072 {
1073   Index n = zhat.size();
1074   Index m = perm.size();
1075 
1076   for (Index k = 0; k < n; ++k)
1077   {
1078     if (zhat(k) == Literal(0))
1079     {
1080       U.col(k) = VectorType::Unit(n+1, k);
1081       if (m_compV) V.col(k) = VectorType::Unit(n, k);
1082     }
1083     else
1084     {
1085       U.col(k).setZero();
1086       for(Index l=0;l<m;++l)
1087       {
1088         Index i = perm(l);
1089         U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
1090       }
1091       U(n,k) = Literal(0);
1092       U.col(k).normalize();
1093 
1094       if (m_compV)
1095       {
1096         V.col(k).setZero();
1097         for(Index l=1;l<m;++l)
1098         {
1099           Index i = perm(l);
1100           V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
1101         }
1102         V(0,k) = Literal(-1);
1103         V.col(k).normalize();
1104       }
1105     }
1106   }
1107   U.col(n) = VectorType::Unit(n+1, n);
1108 }
1109 
1110 
1111 // page 12_13
1112 // i >= 1, di almost null and zi non null.
1113 // We use a rotation to zero out zi applied to the left of M
1114 template <typename MatrixType>
1115 void BDCSVD<MatrixType>::deflation43(Eigen::Index firstCol, Eigen::Index shift, Eigen::Index i, Eigen::Index size)
1116 {
1117   using std::abs;
1118   using std::sqrt;
1119   using std::pow;
1120   Index start = firstCol + shift;
1121   RealScalar c = m_computed(start, start);
1122   RealScalar s = m_computed(start+i, start);
1123   RealScalar r = numext::hypot(c,s);
1124   if (r == Literal(0))
1125   {
1126     m_computed(start+i, start+i) = Literal(0);
1127     return;
1128   }
1129   m_computed(start,start) = r;
1130   m_computed(start+i, start) = Literal(0);
1131   m_computed(start+i, start+i) = Literal(0);
1132 
1133   JacobiRotation<RealScalar> J(c/r,-s/r);
1134   if (m_compU)  m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
1135   else          m_naiveU.applyOnTheRight(firstCol, firstCol+i, J);
1136 }// end deflation 43
1137 
1138 
1139 // page 13
1140 // i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)
1141 // We apply two rotations to have zj = 0;
1142 // TODO deflation44 is still broken and not properly tested
1143 template <typename MatrixType>
1144 void BDCSVD<MatrixType>::deflation44(Eigen::Index firstColu , Eigen::Index firstColm, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index i, Eigen::Index j, Eigen::Index size)
1145 {
1146   using std::abs;
1147   using std::sqrt;
1148   using std::conj;
1149   using std::pow;
1150   RealScalar c = m_computed(firstColm+i, firstColm);
1151   RealScalar s = m_computed(firstColm+j, firstColm);
1152   RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
1153 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
1154   std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; "
1155     << m_computed(firstColm + i-1, firstColm)  << " "
1156     << m_computed(firstColm + i, firstColm)  << " "
1157     << m_computed(firstColm + i+1, firstColm) << " "
1158     << m_computed(firstColm + i+2, firstColm) << "\n";
1159   std::cout << m_computed(firstColm + i-1, firstColm + i-1)  << " "
1160     << m_computed(firstColm + i, firstColm+i)  << " "
1161     << m_computed(firstColm + i+1, firstColm+i+1) << " "
1162     << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
1163 #endif
1164   if (r==Literal(0))
1165   {
1166     m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
1167     return;
1168   }
1169   c/=r;
1170   s/=r;
1171   m_computed(firstColm + i, firstColm) = r;
1172   m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
1173   m_computed(firstColm + j, firstColm) = Literal(0);
1174 
1175   JacobiRotation<RealScalar> J(c,-s);
1176   if (m_compU)  m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
1177   else          m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J);
1178   if (m_compV)  m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);
1179 }// end deflation 44
1180 
1181 
1182 // acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
1183 template <typename MatrixType>
1184 void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index k, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift)
1185 {
1186   using std::sqrt;
1187   using std::abs;
1188   const Index length = lastCol + 1 - firstCol;
1189 
1190   Block<MatrixXr,Dynamic,1> col0(m_computed, firstCol+shift, firstCol+shift, length, 1);
1191   Diagonal<MatrixXr> fulldiag(m_computed);
1192   VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);
1193 
1194   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
1195   RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
1196   RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
1197   RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
1198 
1199 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1200   assert(m_naiveU.allFinite());
1201   assert(m_naiveV.allFinite());
1202   assert(m_computed.allFinite());
1203 #endif
1204 
1205 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
1206   std::cout << "\ndeflate:" << diag.head(k+1).transpose() << "  |  " << diag.segment(k+1,length-k-1).transpose() << "\n";
1207 #endif
1208 
1209   //condition 4.1
1210   if (diag(0) < epsilon_coarse)
1211   {
1212 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
1213     std::cout << "deflation 4.1, because " << diag(0) << " < " << epsilon_coarse << "\n";
1214 #endif
1215     diag(0) = epsilon_coarse;
1216   }
1217 
1218   //condition 4.2
1219   for (Index i=1;i<length;++i)
1220     if (abs(col0(i)) < epsilon_strict)
1221     {
1222 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
1223       std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << "  (diag(" << i << ")=" << diag(i) << ")\n";
1224 #endif
1225       col0(i) = Literal(0);
1226     }
1227 
1228   //condition 4.3
1229   for (Index i=1;i<length; i++)
1230     if (diag(i) < epsilon_coarse)
1231     {
1232 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
1233       std::cout << "deflation 4.3, cancel z(" << i << ")=" << col0(i) << " because diag(" << i << ")=" << diag(i) << " < " << epsilon_coarse << "\n";
1234 #endif
1235       deflation43(firstCol, shift, i, length);
1236     }
1237 
1238 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1239   assert(m_naiveU.allFinite());
1240   assert(m_naiveV.allFinite());
1241   assert(m_computed.allFinite());
1242 #endif
1243 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1244   std::cout << "to be sorted: " << diag.transpose() << "\n\n";
1245   std::cout << "            : " << col0.transpose() << "\n\n";
1246 #endif
1247   {
1248     // Check for total deflation
1249     // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting.
1250     const bool total_deflation = (col0.tail(length-1).array().abs()<considerZero).all();
1251 
1252     // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
1253     // First, compute the respective permutation.
1254     Index *permutation = m_workspaceI.data();
1255     {
1256       permutation[0] = 0;
1257       Index p = 1;
1258 
1259       // Move deflated diagonal entries at the end.
1260       for(Index i=1; i<length; ++i)
1261         if(abs(diag(i))<considerZero)
1262           permutation[p++] = i;
1263 
1264       Index i=1, j=k+1;
1265       for( ; p < length; ++p)
1266       {
1267              if (i > k)             permutation[p] = j++;
1268         else if (j >= length)       permutation[p] = i++;
1269         else if (diag(i) < diag(j)) permutation[p] = j++;
1270         else                        permutation[p] = i++;
1271       }
1272     }
1273 
1274     // If we have a total deflation, then we have to insert diag(0) at the right place
1275     if(total_deflation)
1276     {
1277       for(Index i=1; i<length; ++i)
1278       {
1279         Index pi = permutation[i];
1280         if(abs(diag(pi))<considerZero || diag(0)<diag(pi))
1281           permutation[i-1] = permutation[i];
1282         else
1283         {
1284           permutation[i-1] = 0;
1285           break;
1286         }
1287       }
1288     }
1289 
1290     // Current index of each col, and current column of each index
1291     Index *realInd = m_workspaceI.data()+length;
1292     Index *realCol = m_workspaceI.data()+2*length;
1293 
1294     for(int pos = 0; pos< length; pos++)
1295     {
1296       realCol[pos] = pos;
1297       realInd[pos] = pos;
1298     }
1299 
1300     for(Index i = total_deflation?0:1; i < length; i++)
1301     {
1302       const Index pi = permutation[length - (total_deflation ? i+1 : i)];
1303       const Index J = realCol[pi];
1304 
1305       using std::swap;
1306       // swap diagonal and first column entries:
1307       swap(diag(i), diag(J));
1308       if(i!=0 && J!=0) swap(col0(i), col0(J));
1309 
1310       // change columns
1311       if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + 1).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + 1));
1312       else         m_naiveU.col(firstCol+i).segment(0, 2)                .swap(m_naiveU.col(firstCol+J).segment(0, 2));
1313       if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length));
1314 
1315       //update real pos
1316       const Index realI = realInd[i];
1317       realCol[realI] = J;
1318       realCol[pi] = i;
1319       realInd[J] = realI;
1320       realInd[i] = pi;
1321     }
1322   }
1323 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1324   std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
1325   std::cout << "      : " << col0.transpose() << "\n\n";
1326 #endif
1327 
1328   //condition 4.4
1329   {
1330     Index i = length-1;
1331     while(i>0 && (abs(diag(i))<considerZero || abs(col0(i))<considerZero)) --i;
1332     for(; i>1;--i)
1333        if( (diag(i) - diag(i-1)) < NumTraits<RealScalar>::epsilon()*maxDiag )
1334       {
1335 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
1336         std::cout << "deflation 4.4 with i = " << i << " because " << diag(i) << " - " << diag(i-1) << " == " << (diag(i) - diag(i-1)) << " < " << NumTraits<RealScalar>::epsilon()*/*diag(i)*/maxDiag << "\n";
1337 #endif
1338         eigen_internal_assert(abs(diag(i) - diag(i-1))<epsilon_coarse && " diagonal entries are not properly sorted");
1339         deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-1, i, length);
1340       }
1341   }
1342 
1343 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1344   for(Index j=2;j<length;++j)
1345     assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);
1346 #endif
1347 
1348 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
1349   assert(m_naiveU.allFinite());
1350   assert(m_naiveV.allFinite());
1351   assert(m_computed.allFinite());
1352 #endif
1353 }//end deflation
1354 
1355 /** \svd_module
1356   *
1357   * \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm
1358   *
1359   * \sa class BDCSVD
1360   */
1361 template<typename Derived>
1362 BDCSVD<typename MatrixBase<Derived>::PlainObject>
1363 MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const
1364 {
1365   return BDCSVD<PlainObject>(*this, computationOptions);
1366 }
1367 
1368 } // end namespace Eigen
1369 
1370 #endif
1371