eigenCore/decomposition/RealSchur.hpp

/*
XLiFE++ is an extended library of finite elements written in C++
    Copyright (C) 2014  Lunéville, Eric; Kielbasiewicz, Nicolas; Lafranche, Yvon; Nguyen, Manh-Ha; Chambeyron, Colin

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*!
  \file RealSchur.hpp
  \author Manh Ha NGUYEN
  \since 22 Jan 2013
  \date  7 August 2013

  \brief Definition of the xlifepp::RealSchur class

  Class deals with real Schur decomposition.
*/

// This file is adapted from Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Claire Maurice
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_REAL_SCHUR_HPP
#define EIGEN_REAL_SCHUR_HPP

#include "utils.h"
#include "../utils/Jacobi.hpp"
#include "../utils/VectorEigenDense.hpp"
#include "./HessenbergDecomposition.hpp"
#include "./HouseholderQR.hpp"

namespace xlifepp
{

namespace internalEigenSolver
{
template<typename MatrixType>
void swapRealSchurInPlace(MatrixType& matrixT, MatrixType& matrixQ, unsigned int ifst, unsigned int ilst);
template<typename MatrixType>
MatrixType directSwapping(MatrixType& matT, unsigned int p, unsigned int q, real_t gamma);
}

/*!
  \class RealSchur

  \brief Performs a real Schur decomposition of a square matrix

  \tparam _MatrixType the type of the matrix of which we are computing the
  real Schur decomposition; this is expected to be an instantiation of the
  Matrix class template.

  Given a real square matrix A, this class computes the real Schur
  decomposition: \f$ A = U T U^T \f$ where U is a real orthogonal matrix and
  T is a real quasi-triangular matrix. An orthogonal matrix is a matrix whose
  inverse is equal to its transpose, \f$ U^{-1} = U^T \f$. A quasi-triangular
  matrix is a block-triangular matrix whose diagonal consists of 1-by-1
  blocks and 2-by-2 blocks with complex eigenvalues. The eigenvalues of the
  blocks on the diagonal of T are the same as the eigenvalues of the matrix
  A, and thus the real Schur decomposition is used in EigenSolver to compute
  the eigendecomposition of a matrix.

  Call the function compute() to compute the real Schur decomposition of a
  given matrix. Alternatively, you can use the RealSchur(const MatrixType&, bool)
  constructor which computes the real Schur decomposition at construction
  time. Once the decomposition is computed, you can use the matrixU() and
  matrixT() functions to retrieve the matrices U and T in the decomposition.

  \note The implementation is adapted from
  <a href="http://math.nist.gov/javanumerics/jama/">JAMA</a> (public domain).
  Their code is based on EISPACK.

*/
template<typename _MatrixType>
class RealSchur
{
  public:
    typedef _MatrixType MatrixType;
    typedef typename MatrixType::type_t Scalar;
    typedef typename NumTraits<Scalar>::ComplexScalar ComplexScalar;

    typedef VectorEigenDense<ComplexScalar> EigenvalueType;
    typedef VectorEigenDense<Scalar> ColumnVectorType;

    /*!
      \brief Default constructor.

      \param [in] size  Positive integer, size of the matrix whose Schur decomposition will be computed.

      The default constructor is useful in cases in which the user intends to
      perform decompositions via compute().  The \p size parameter is only
      used as a hint. It is not an error to give a wrong \p size, but it may
      impair performance.

      \sa compute() for an example.
    */
    RealSchur(Index size)
      : matT_(size, size),
        matU_(size, size),
        hess_(size),
        maxIterations_(40),
        isInitialized_(false),
        matUisUptodate_(false),
        info_(_noConvergence)
    { }

    /*!
      \brief Constructor; computes real Schur decomposition of given matrix.

      \param[in]  matrix    Square matrix whose Schur decomposition is to be computed.
      \param[in]  computeU  If true, both T and U are computed; if false, only T is computed.

      This constructor calls compute() to compute the Schur decomposition.
    */
    RealSchur(const MatrixType& matrix, bool computeU = true)
      : matT_(matrix.numOfRows(), matrix.numOfCols()),
        matU_(matrix.numOfRows(), matrix.numOfCols()),
        hess_(matrix.numOfRows()),
        maxIterations_(40),
        isInitialized_(false),
        matUisUptodate_(false),
        info_(_noConvergence)
    {
      compute(matrix, computeU);
    }

    /*!
      \brief Returns the orthogonal matrix in the Schur decomposition.

      \returns A const reference to the matrix U.

      \pre Either the constructor RealSchur(const MatrixType&, bool) or the
      member function compute(const MatrixType&, bool) has been called before
      to compute the Schur decomposition of a matrix, and \p computeU was set
      to true (the default value).

      \sa RealSchur(const MatrixType&, bool) for an example
    */
    const MatrixType& matrixU() const
    {
      if (!isInitialized_) { error("eigensolver_not_initialized", "RealSchur"); }
      if (!matUisUptodate_) { error("eigensolver_matrix_not_computed", "U", "Real"); }
      return matU_;
    }

    /*!
      \brief Returns the quasi-triangular matrix in the Schur decomposition.

      \returns A const reference to the matrix T.

      \pre Either the constructor RealSchur(const MatrixType&, bool) or the
      member function compute(const MatrixType&, bool) has been called before
      to compute the Schur decomposition of a matrix.
    */
    const MatrixType& matrixT() const
    {
      if (!isInitialized_) { error("eigensolver_not_initialized", "RealSchur"); }
      return matT_;
    }

    /*!
      \brief Computes Schur decomposition of given matrix.

      \param[in]  matrix    Square matrix whose Schur decomposition is to be computed.
      \param[in]  computeU  If true, both T and U are computed; if false, only T is computed.
      \returns    Reference to \c *this

      The Schur decomposition is computed by first reducing the matrix to
      Hessenberg form using the class HessenbergDecomposition. The Hessenberg
      matrix is then reduced to triangular form by performing Francis QR
      iterations with implicit double shift. The cost of computing the Schur
      decomposition depends on the number of iterations; as a rough guide, it
      may be taken to be \f$25n^3\f$ flops if \a computeU is true and
      \f$10n^3\f$ flops if \a computeU is false.
    */
    RealSchur& compute(const MatrixType& matrix, bool computeU = true);

    /*!
      \brief Reports whether previous computation was successful.

      \returns \c Success if computation was succesful, \c NoConvergence otherwise.
    */
    ComputationInfo info() const
    {
      if (!isInitialized_) { error("eigensolver_not_initialized", "RealSchur"); }
      return info_;
    }

    /*!
      \brief Reorder block diagonal of quasi-triagular from row ifst to row with index ilst
      */
    void swapSchur(unsigned int ifst, unsigned int ilst, bool computeU);


  private:
    MatrixType matT_;
    MatrixType matU_;
    HessenbergDecomposition<MatrixType> hess_;

    /*!
      \brief Maximum number of iterations.

      Maximum number of iterations allowed for an eigenvalue to converge.
      Default value = 40 (taken from LAPACK)
    */
    int maxIterations_;

    bool isInitialized_; //!< true if initialized
    bool matUisUptodate_;
    ComputationInfo info_;


    typedef VectorEigenDense<Scalar> Vector3s;

    Scalar computeNormOfT();
    Index findSmallSubdiagEntry(Index iu, Scalar norm);
    void splitOffTwoRows(Index iu, bool computeU, Scalar exshift);
    void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
    void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
    void performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector);
};


template<typename MatrixType>
RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const MatrixType& matrix, bool computeU)
{
  trace_p->push("RealSchur::compute");
  if (matrix.numOfCols() != matrix.numOfRows()) { matrix.nonSquare("Computing real schur", matrix.numOfRows(), matrix.numOfCols()); }

  // Step 1. Reduce to Hessenberg form
  hess_.compute(matrix);
  matT_ = hess_.matrixH();
  if (computeU)
  { matU_ = hess_.matrixQ(); }

  // Step 2. Reduce to real Schur form
  //m_workspaceVector.resize(matT_.numOfCols());
  //Scalar* workspace = &m_workspaceVector.coeffRef(0);

  // The matrix matT_ is divided in three parts.
  // Rows 0,...,il-1 are decoupled from the rest because matT_(il,il-1) is zero.
  // Rows il,...,iu is the part we are working on (the active window).
  // Rows iu+1,...,end are already brought in triangular form.
  Index iu = matT_.numOfCols() - 1;
  Index iter = 0;      // iteration count for current eigenvalue
  Index totalIter = 0; // iteration count for whole matrix
  Scalar exshift(0);   // sum of exceptional shifts
  Scalar norm = computeNormOfT();

  if(norm != 0)
  {
    while (iu >= 0)
    {
      Index il = findSmallSubdiagEntry(iu, norm);

      // Check for convergence
      if (il == iu) // One root found
      {
        matT_.coeffRef(iu, iu) = matT_.coeff(iu, iu) + exshift;
        if (iu > 0)
        { matT_.coeffRef(iu, iu - 1) = Scalar(0); }
        iu--;
        iter = 0;
      }
      else if (il == iu - 1) // Two roots found
      {
        splitOffTwoRows(iu, computeU, exshift);
        iu -= 2;
        iter = 0;
      }
      else // No convergence yet
      {
        // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
        Vector3s firstHouseholderVector(3, 0.0), shiftInfo(3, 0.0);
        computeShift(iu, iter, exshift, shiftInfo);
        iter = iter + 1;
        totalIter = totalIter + 1;
        if (totalIter > maxIterations_ * matrix.numOfCols()) { break; }
        Index im;
        initFrancisQRStep(il, iu, shiftInfo, im, firstHouseholderVector);
        performFrancisQRStep(il, im, iu, computeU, firstHouseholderVector);
      }
    }
  }
  if(totalIter <= maxIterations_ * matrix.numOfCols())
  { info_ = _success; }
  else
  { info_ = _noConvergence; }

  isInitialized_ = true;
  matUisUptodate_ = computeU;

  trace_p->pop();
  return *this;
}

/*! \internal Computes and returns vector L1 norm of T */
template<typename MatrixType>
typename MatrixType::type_t RealSchur<MatrixType>::computeNormOfT()
{
  const Index size = matT_.numOfCols();
  // FIXME to be efficient the following would requires a triangular reduxion code
  // Scalar norm = matT_.upper().cwiseAbs().sum()
  //               + matT_.bottomLeftCorner(size-1,size-1).diagonal().cwiseAbs().sum();
  Scalar norm(0);
  for (Index j = 0; j < size; ++j)
    //norm += matT_.row(j).segment((std::max)(j-1,Index(0)), size-(std::max)(j-1,Index(0))).cwiseAbs().sum();
  { norm += matT_.blockRow(j, std::max(j - 1, Index(0)), size - std::max(j - 1, Index(0))).sumAbs(); }
  return norm;
}

/*! \internal Look for single small sub-diagonal element and returns its index */
template<typename MatrixType>
Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, Scalar norm)
{
  Index res = iu;
  while (res > 0)
  {
    Scalar s = std::abs(matT_.coeff(res - 1, res - 1)) + std::abs(matT_.coeff(res, res));
    if (s == 0.0)
    { s = norm; }
    if (std::abs(matT_.coeff(res, res - 1)) < (NumTraits<Scalar>::epsilon() * s))
    { break; }
    res--;
  }
  return res;
}

/*! \internal Update T given that rows iu-1 and iu decouple from the rest. */
template<typename MatrixType>
inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, Scalar exshift)
{
  const Index size = matT_.numOfCols();
  const Index row = matT_.numOfRows();

  // The eigenvalues of the 2x2 matrix [a b; c d] are
  // trace +/- sqrt(discr/4) where discr = tr^2 - 4*det, tr = a + d, det = ad - bc
  Scalar p = Scalar(0.5) * (matT_.coeff(iu - 1, iu - 1) - matT_.coeff(iu, iu));
  Scalar q = p * p + matT_.coeff(iu, iu - 1) * matT_.coeff(iu - 1, iu); // q = tr^2 / 4 - det = discr/4
  matT_.coeffRef(iu, iu) += exshift;
  matT_.coeffRef(iu - 1, iu - 1) += exshift;

  if (q >= Scalar(0)) // Two real eigenvalues
  {
    Scalar z = std::sqrt(std::abs(q));
    JacobiRotation<Scalar> rot;
    if (p >= Scalar(0))
    { rot.makeGivens(p + z, matT_.coeff(iu, iu - 1)); }
    else
    { rot.makeGivens(p - z, matT_.coeff(iu, iu - 1)); }

    //    matT_.rightCols(size-iu+1).applyOnTheLeft(iu-1, iu, rot.adjoint());
    MatrixType rightCols(matT_, 0, iu - 1, row, size - iu + 1);
    rightCols.applyOnTheLeft(iu - 1, iu, rot.adjoint());
    matT_.replace(rightCols, 0, iu - 1, row, size - iu + 1);
    //    matT_.topRows(iu+1).applyOnTheRight(iu-1, iu, rot);
    MatrixType topRows(matT_, 0, 0, iu + 1, size);
    topRows.applyOnTheRight(iu - 1, iu, rot);
    matT_.replace(topRows, 0, 0, iu + 1, size);

    matT_.coeffRef(iu, iu - 1) = Scalar(0);

    if (computeU)
    { matU_.applyOnTheRight(iu - 1, iu, rot); }
  }

  if (iu > 1)
  { matT_.coeffRef(iu - 1, iu - 2) = Scalar(0); }
}

/*! \internal Form shift in shiftInfo, and update exshift if an exceptional shift is performed. */
template<typename MatrixType>
inline void RealSchur<MatrixType>::computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo)
{
  shiftInfo.coeffRef(0) = matT_.coeff(iu, iu);
  shiftInfo.coeffRef(1) = matT_.coeff(iu - 1, iu - 1);
  shiftInfo.coeffRef(2) = matT_.coeff(iu, iu - 1) * matT_.coeff(iu - 1, iu);

  // Wilkinson's original ad hoc shift
  if (iter == 10)
  {
    exshift += shiftInfo.coeff(0);
    for (Index i = 0; i <= iu; ++i)
    { matT_.coeffRef(i, i) -= shiftInfo.coeff(0); }
    Scalar s = std::abs(matT_.coeff(iu, iu - 1)) + std::abs(matT_.coeff(iu - 1, iu - 2));
    shiftInfo.coeffRef(0) = Scalar(0.75) * s;
    shiftInfo.coeffRef(1) = Scalar(0.75) * s;
    shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s;
  }

  // MATLAB's new ad hoc shift
  if (iter == 30)
  {
    Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
    s = s * s + shiftInfo.coeff(2);
    if (s > Scalar(0))
    {
      s = std::sqrt(s);
      if (shiftInfo.coeff(1) < shiftInfo.coeff(0))
      { s = -s; }
      s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
      s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s;
      exshift += s;
      for (Index i = 0; i <= iu; ++i)
      { matT_.coeffRef(i, i) -= s; }
      //shiftInfo.setConstant(Scalar(0.964));
      shiftInfo.set(Scalar(0.964));
    }
  }
}

/*! \internal Compute index im at which Francis QR step starts and the first Householder vector. */
template<typename MatrixType>
inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector)
{
  Vector3s& v = firstHouseholderVector; // alias to save typing

  for (im = iu - 2; im >= il; --im)
  {
    const Scalar Tmm = matT_.coeff(im, im);
    const Scalar r = shiftInfo.coeff(0) - Tmm;
    const Scalar s = shiftInfo.coeff(1) - Tmm;
    v.coeffRef(0) = (r * s - shiftInfo.coeff(2)) / matT_.coeff(im + 1, im) + matT_.coeff(im, im + 1);
    v.coeffRef(1) = matT_.coeff(im + 1, im + 1) - Tmm - r - s;
    v.coeffRef(2) = matT_.coeff(im + 2, im + 1);
    if (im == il)
    {
      break;
    }
    const Scalar lhs = matT_.coeff(im, im - 1) * (std::abs(v.coeff(1)) + std::abs(v.coeff(2)));
    const Scalar rhs = v.coeff(0) * (std::abs(matT_.coeff(im - 1, im - 1)) + std::abs(Tmm) + std::abs(matT_.coeff(im + 1, im + 1)));
    if (std::abs(lhs) < (NumTraits<Scalar>::epsilon() * rhs))
    {
      break;
    }
  }
}

/*! \internal Perform a Francis QR step involving rows il:iu and columns im:iu. */
template<typename MatrixType>
inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector)
{
  if (im < il) { error("is_lesser", im, il); }
  if (im > iu-2) { error("is_greater", im, iu-2); }
  const Index size = matT_.numOfCols();

  for (Index k = im; k <= iu - 2; ++k)
  {
    bool firstIteration = (k == im);

    Vector3s v(3);
    if (firstIteration)
    { v = firstHouseholderVector; }
    else
      //v = matT_.template block<3,1>(k,k-1);
    { v = matT_.blockCol(k, k - 1, 3); }

    Scalar tau, beta;
    ColumnVectorType ess(2);
    //Matrix<Scalar, 2, 1> ess;
    v.makeHouseHolder(ess, tau, beta);

    if (beta != Scalar(0)) // if v is not zero
    {
      if (firstIteration && k > il)
      { matT_.coeffRef(k, k - 1) = -matT_.coeff(k, k - 1); }
      else if (!firstIteration)
      { matT_.coeffRef(k, k - 1) = beta; }

      // These Householder transformations form the O(n^3) part of the algorithm
      //matT_.block(k, k, 3, size-k).applyHouseholderOnTheLeft(ess, tau, workspace);
      MatrixType block1(matT_, k, k, 3, size - k);
      block1.applyHouseholderOnTheLeft(ess, tau);
      matT_.replace(block1,  k, k, 3, size - k);
      //matT_.block(0, k, (std::min)(iu,k+3) + 1, 3).applyHouseholderOnTheRight(ess, tau, workspace);
      MatrixType block2(matT_, 0, k, std::min(iu, k + 3) + 1, 3);
      block2.applyHouseholderOnTheRight(ess, tau);
      matT_.replace(block2,  0, k, std::min(iu, k + 3) + 1, 3);
      if (computeU)
      {
        //matU_.block(0, k, size, 3).applyHouseholderOnTheRight(ess, tau, workspace);
        MatrixType blockU(matU_, 0, k, size, 3);
        blockU.applyHouseholderOnTheRight(ess, tau);
        matU_.replace(blockU, 0, k, size, 3);
      }
    }
  }

  //Matrix<Scalar, 2, 1> v = matT_.template block<2,1>(iu-1, iu-2);
  ColumnVectorType v = matT_.blockCol(iu - 1, iu - 2, 2);
  Scalar tau, beta;
  ColumnVectorType ess(1);
  //Matrix<Scalar, 1, 1> ess;
  v.makeHouseHolder(ess, tau, beta);

  if (beta != Scalar(0)) // if v is not zero
  {
    matT_.coeffRef(iu - 1, iu - 2) = beta;
    //    matT_.block(iu-1, iu-1, 2, size-iu+1).applyHouseholderOnTheLeft(ess, tau, workspace);
    //    matT_.block(0, iu-1, iu+1, 2).applyHouseholderOnTheRight(ess, tau, workspace);
    MatrixType block1(matT_, iu - 1, iu - 1, 2, size - iu + 1);
    block1.applyHouseholderOnTheLeft(ess, tau);
    matT_.replace(block1,  iu - 1, iu - 1, 2, size - iu + 1);
    MatrixType block2(matT_, 0, iu - 1, iu + 1, 2);
    block2.applyHouseholderOnTheRight(ess, tau);
    matT_.replace(block2,  0, iu - 1, iu + 1, 2);

    if (computeU)
    {
      //matU_.block(0, iu-1, size, 2).applyHouseholderOnTheRight(ess, tau, workspace);
      MatrixType blockU(matU_, 0, iu - 1, size, 2);
      blockU.applyHouseholderOnTheRight(ess, tau);
      matU_.replace(blockU, 0, iu - 1, size, 2);
    }
  }

  // clean up pollution due to round-off errors
  for (Index i = im + 2; i <= iu; ++i)
  {
    matT_.coeffRef(i, i - 2) = Scalar(0);
    if (i > im + 2)
    { matT_.coeffRef(i, i - 3) = Scalar(0); }
  }
}

template<typename MatrixType>
void RealSchur<MatrixType>::swapSchur(unsigned int ifst, unsigned int ilst, bool computeU)
{
    internalEigenSolver::swapRealSchurInPlace(matT_,matU_,ifst, ilst);
}

namespace internalEigenSolver {
/*!
 * \brief Given the quasi-triangular matrix T and orthogonal matrix Q obtained from
 *  the real Schur decomposition, this function reorders the eigenvalues appearing on the (block) diagonal of matrix T
 *  The diagonal element of block of T with row index ifst is moved to row ilst
 *
 *  This is implemented from "On Swapping Diagonal Blocks in Real Schur Form"
 *  \param[in,out] matrixT quasi-triangular matrix of real Schur decomposition
 *  \param[in,out] matrixQ orthogonal of real Schur decomposition
 *  \param[in] ifst index of row needs moving
 *  \param[in] ilst index of destination row
 */
template<typename MatrixType>
void swapRealSchurInPlace(MatrixType& matrixT, MatrixType& matrixQ, unsigned int ifst, unsigned int ilst)
{
    if (ifst != ilst) {
        bool moveDown = (ifst < ilst);
        unsigned int size = matrixT.numOfRows();
        unsigned int idxf = std::min(ifst,ilst);
        unsigned int idxl = std::max(ifst,ilst);//moveDown ? (ilst-ifst+1):(ifst-ilst+1);
        unsigned int idx = idxf;

        std::vector<unsigned int> blockSize;
        while (idx <= idxl) {
            if ((idx != (size-1)) && (NumTraits<real_t>::prec() < std::abs(matrixT.coeff(idx+1,idx)))) {
                blockSize.push_back(2);
                idx += 2;
            } else {
                blockSize.push_back(1);
                ++idx;
            }
        }

        int blockNum = blockSize.size();


        // Very naive value, just to make sure gamma <= 1
        real_t gamma = (std::abs(matrixT.coeff(idxf+1,idxf+1)+matrixT.coeff(idxf,idxf)))/(std::abs(matrixT.coeff(idxf+1,idxf+1)+std::abs(matrixT.coeff(idxf,idxf))));

        if (moveDown) {
            idx = idxf;
            for (int i = 0; i < blockNum-1; ++i) {
                JacobiRotation<real_t> rot;
                if ( 2  == (blockSize[i]+blockSize[i+1])) {
                    if (NumTraits<real_t>::epsilon() < (std::abs(matrixT.coeff(idx+1,idx+1)- matrixT.coeff(idx,idx)))) {
                        rot.makeGivens(matrixT.coeff(idx,idx+1), (matrixT.coeff(idx+1,idx+1) - matrixT.coeff(idx,idx)));
                        matrixT.applyOnTheRight(idx,idx+1,rot);
                        matrixT.applyOnTheLeft(idx,idx+1,rot.adjoint());
                        matrixQ.applyOnTheRight(idx,idx+1,rot);
                    }
                    matrixT.coeffRef(idx+1,idx) = real_t(0);

                    ++idx;
                }
                else {
                    MatrixType T(matrixT,idx,idx,blockSize[i]+blockSize[i+1],blockSize[i]+blockSize[i+1]);
                    MatrixType Q = directSwapping(T, blockSize[i], blockSize[i+1], gamma);
                    MatrixType tempTcol(matrixT,0,idx,size,blockSize[i]+blockSize[i+1]);
                    MatrixType tmp(tempTcol);
                    multMatMat(tempTcol,Q,tmp);
                    matrixT.replace(tmp,0,idx,size,blockSize[i]+blockSize[i+1]);

                    MatrixType tempQ(matrixQ,0,idx,size,blockSize[i]+blockSize[i+1]);
                    multMatMat(tempQ,Q,tmp);
                    matrixQ.replace(tmp,0,idx,size,blockSize[i]+blockSize[i+1]);

                    MatrixType tempTrow(matrixT,idx,0,blockSize[i]+blockSize[i+1],size);
                    tmp.reshape(blockSize[i]+blockSize[i+1],size);
                    multMatMat(transpose(Q),tempTrow,tmp);
                    matrixT.replace(tmp,idx,0,blockSize[i]+blockSize[i+1],size);

                    if (1 == blockSize[i+1]) {
                        matrixT.coeffRef(idx+1,idx) = real_t(0);
                        matrixT.coeffRef(idx+2,idx) = real_t(0);
                    } else {
                        matrixT.coeffRef(idx+2,idx) = real_t(0);
                        matrixT.coeffRef(idx+2,idx+1) = real_t(0);
                        if (2 == blockSize[i]) {
                            matrixT.coeffRef(idx+3,idx) = real_t(0);
                            matrixT.coeffRef(idx+3,idx+1) = real_t(0);
                        }
                    }
                    idx += 2;

                }
                std::swap(blockSize[i],blockSize[i+1]);
            }
        }
        else {
            idx = idxl;
            for (int i = blockNum-2; i >=0 ; --i) {
                JacobiRotation<real_t> rot;
                if ( 2  == (blockSize[i]+blockSize[i+1])) {
                    --idx;
                    if (NumTraits<real_t>::epsilon() < (std::abs(matrixT.coeff(idx+1,idx+1)- matrixT.coeff(idx,idx)))) {
                        rot.makeGivens(matrixT.coeff(idx,idx+1), (matrixT.coeff(idx+1,idx+1) - matrixT.coeff(idx,idx)));
                        matrixT.applyOnTheRight(idx,idx+1,rot);
                        matrixT.applyOnTheLeft(idx,idx+1,rot.adjoint());
                        matrixQ.applyOnTheRight(idx,idx+1,rot);
                    }
                    matrixT.coeffRef(idx+1,idx) = real_t(0);
                }
                else {
                    if (2 == blockSize[i]) idx -= 2;
                    else --idx;
                    MatrixType T(matrixT,idx,idx,blockSize[i]+blockSize[i+1],blockSize[i]+blockSize[i+1]);
                    MatrixType Q = directSwapping(T, blockSize[i], blockSize[i+1], gamma);
                    MatrixType tempTcol(matrixT,0,idx,size,blockSize[i]+blockSize[i+1]);
                    MatrixType tmp(tempTcol);
                    multMatMat(tempTcol,Q,tmp);
                    matrixT.replace(tmp,0,idx,size,blockSize[i]+blockSize[i+1]);

                    MatrixType tempQ(matrixQ,0,idx,size,blockSize[i]+blockSize[i+1]);
                    multMatMat(tempQ,Q,tmp);
                    matrixQ.replace(tmp,0,idx,size,blockSize[i]+blockSize[i+1]);

                    MatrixType tempTrow(matrixT,idx,0,blockSize[i]+blockSize[i+1],size);
                    tmp.reshape(blockSize[i]+blockSize[i+1],size);
                    multMatMat(transpose(Q),tempTrow,tmp);
                    matrixT.replace(tmp,idx,0,blockSize[i]+blockSize[i+1],size);
                    if (1 == blockSize[i+1]) {
                        matrixT.coeffRef(idx+1,idx) = real_t(0);
                        matrixT.coeffRef(idx+2,idx) = real_t(0);
                    } else {
                        matrixT.coeffRef(idx+2,idx) = real_t(0);
                        matrixT.coeffRef(idx+2,idx+1) = real_t(0);
                        if (2 == blockSize[i]) {
                            matrixT.coeffRef(idx+3,idx) = real_t(0);
                            matrixT.coeffRef(idx+3,idx+1) = real_t(0);
                        }
                    }
                }
                std::swap(blockSize[i],blockSize[i+1]);
            }
        }
    }
}

template<typename MatrixType>
MatrixType directSwapping(MatrixType& matT, unsigned int p, unsigned int q, real_t gamma)
{
    MatrixType eyeP(p), eyeQ(q);
    eyeMatrix(eyeP); eyeMatrix(eyeQ);
    MatrixType A11(matT,0,0,p,p);
    MatrixType A12(matT,0,p,p,q);
    MatrixType A22(matT,p,p,q,q);
    A22 = transpose(A22);
    MatrixType Kron = kroneckerProduct(eyeQ,A11);
    MatrixType Kron2 = kroneckerProduct(A22,eyeP);
    Kron -= Kron2;

    std::vector<typename MatrixType::Scalar> X(p*q);
    for (unsigned int j = 0; j < q; ++j) {
        for (unsigned int i = 0; i < p; ++i) {
            X[j*p+i] = gamma*A12.coeff(i,j);
        }
    }

    typename MatrixType::Scalar piv;
    number_t row;
    gaussSolver(Kron,X,piv,row);
    unsigned int qrSize = p+q;
    MatrixType QR(qrSize,q);
    for (unsigned int j = 0; j < q; ++j) {
        for (unsigned int i = 0; i < p; ++i) {
            QR.coeffRef(i,j) = -X[j*p+i];
        }
        QR.coeffRef(p+j,j) = gamma;
    }

    HouseholderQR<MatrixType> qr(QR);
    MatrixType matQ = qr.matrixQ();

    return matQ;
}

} // end namespace internalEigenSolver

} // end namespace xlifepp

#endif // EIGEN_REAL_SCHUR_HPP