linalg.cpp - OpenGrok cross reference for /dports/math/alglib/alglib-cpp/src/linalg.cpp

/*************************************************************************
ALGLIB 3.18.0 (source code generated 2021-10-25)
Copyright (c) Sergey Bochkanov (ALGLIB project).

>>> SOURCE LICENSE >>>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation (www.fsf.org); either version 2 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

A copy of the GNU General Public License is available at
http://www.fsf.org/licensing/licenses
>>> END OF LICENSE >>>
*************************************************************************/
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "stdafx.h"
#include "linalg.h"

// disable some irrelevant warnings
#if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
#pragma warning(disable:4100)
#pragma warning(disable:4127)
#pragma warning(disable:4611)
#pragma warning(disable:4702)
#pragma warning(disable:4996)
#endif

/////////////////////////////////////////////////////////////////////////
//
// THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
//
/////////////////////////////////////////////////////////////////////////
namespace alglib
{

#if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Cache-oblivous complex "copy-and-transpose"

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void cmatrixtranspose(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Cache-oblivous real "copy-and-transpose"

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void rmatrixtranspose(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This code enforces symmetricy of the matrix by copying Upper part to lower
one (or vice versa).

INPUT PARAMETERS:
    A   -   matrix
    N   -   number of rows/columns
    IsUpper - whether we want to copy upper triangle to lower one (True)
            or vice versa (False).
*************************************************************************/
void rmatrixenforcesymmetricity(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixenforcesymmetricity(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Copy

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void cmatrixcopy(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Copy

Input parameters:
    N   -   subvector size
    A   -   source vector, N elements are copied
    IA  -   source offset (first element index)
    B   -   destination vector, must be large enough to store result
    IB  -   destination offset (first element index)
*************************************************************************/
void rvectorcopy(const ae_int_t n, const real_1d_array &a, const ae_int_t ia, const real_1d_array &b, const ae_int_t ib, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rvectorcopy(n, const_cast<alglib_impl::ae_vector*>(a.c_ptr()), ia, const_cast<alglib_impl::ae_vector*>(b.c_ptr()), ib, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Copy

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void rmatrixcopy(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Performs generalized copy: B := Beta*B + Alpha*A.

If Beta=0, then previous contents of B is simply ignored. If Alpha=0, then
A is ignored and not referenced. If both Alpha and Beta  are  zero,  B  is
filled by zeros.

Input parameters:
    M   -   number of rows
    N   -   number of columns
    Alpha-  coefficient
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    Beta-   coefficient
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void rmatrixgencopy(const ae_int_t m, const ae_int_t n, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const double beta, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixgencopy(m, n, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, beta, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Rank-1 correction: A := A + alpha*u*v'

NOTE: this  function  expects  A  to  be  large enough to store result. No
      automatic preallocation happens for  smaller  arrays.  No  integrity
      checks is performed for sizes of A, u, v.

INPUT PARAMETERS:
    M   -   number of rows
    N   -   number of columns
    A   -   target matrix, MxN submatrix is updated
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    Alpha-  coefficient
    U   -   vector #1
    IU  -   subvector offset
    V   -   vector #2
    IV  -   subvector offset


  -- ALGLIB routine --

     16.10.2017
     Bochkanov Sergey
*************************************************************************/
void rmatrixger(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const double alpha, const real_1d_array &u, const ae_int_t iu, const real_1d_array &v, const ae_int_t iv, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixger(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, alpha, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Rank-1 correction: A := A + u*v'

INPUT PARAMETERS:
    M   -   number of rows
    N   -   number of columns
    A   -   target matrix, MxN submatrix is updated
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    U   -   vector #1
    IU  -   subvector offset
    V   -   vector #2
    IV  -   subvector offset
*************************************************************************/
void cmatrixrank1(const ae_int_t m, const ae_int_t n, complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_1d_array &u, const ae_int_t iu, complex_1d_array &v, const ae_int_t iv, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGER()
           which is more generic version of this function.

Rank-1 correction: A := A + u*v'

INPUT PARAMETERS:
    M   -   number of rows
    N   -   number of columns
    A   -   target matrix, MxN submatrix is updated
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    U   -   vector #1
    IU  -   subvector offset
    V   -   vector #2
    IV  -   subvector offset
*************************************************************************/
void rmatrixrank1(const ae_int_t m, const ae_int_t n, real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_1d_array &u, const ae_int_t iu, real_1d_array &v, const ae_int_t iv, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************

*************************************************************************/
void rmatrixgemv(const ae_int_t m, const ae_int_t n, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, const double beta, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixgemv(m, n, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, beta, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Matrix-vector product: y := op(A)*x

INPUT PARAMETERS:
    M   -   number of rows of op(A)
            M>=0
    N   -   number of columns of op(A)
            N>=0
    A   -   target matrix
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    OpA -   operation type:
            * OpA=0     =>  op(A) = A
            * OpA=1     =>  op(A) = A^T
            * OpA=2     =>  op(A) = A^H
    X   -   input vector
    IX  -   subvector offset
    IY  -   subvector offset
    Y   -   preallocated matrix, must be large enough to store result

OUTPUT PARAMETERS:
    Y   -   vector which stores result

if M=0, then subroutine does nothing.
if N=0, Y is filled by zeros.


  -- ALGLIB routine --

     28.01.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixmv(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const complex_1d_array &x, const ae_int_t ix, complex_1d_array &y, const ae_int_t iy, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGEMV()
           which is more generic version of this function.

Matrix-vector product: y := op(A)*x

INPUT PARAMETERS:
    M   -   number of rows of op(A)
    N   -   number of columns of op(A)
    A   -   target matrix
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    OpA -   operation type:
            * OpA=0     =>  op(A) = A
            * OpA=1     =>  op(A) = A^T
    X   -   input vector
    IX  -   subvector offset
    IY  -   subvector offset
    Y   -   preallocated matrix, must be large enough to store result

OUTPUT PARAMETERS:
    Y   -   vector which stores result

if M=0, then subroutine does nothing.
if N=0, Y is filled by zeros.


  -- ALGLIB routine --

     28.01.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixmv(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************

*************************************************************************/
void rmatrixsymv(const ae_int_t n, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const bool isupper, const real_1d_array &x, const ae_int_t ix, const double beta, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixsymv(n, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, beta, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************

*************************************************************************/
double rmatrixsyvmv(const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const bool isupper, const real_1d_array &x, const ae_int_t ix, const real_1d_array &tmp, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixsyvmv(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(tmp.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
This subroutine solves linear system op(A)*x=b where:
* A is NxN upper/lower triangular/unitriangular matrix
* X and B are Nx1 vectors
* "op" may be identity transformation or transposition

Solution replaces X.

IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
           * no integrity checks for operand sizes, out-of-bounds accesses
             and so on is performed

INPUT PARAMETERS
    N   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
    IA      -   submatrix offset
    JA      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    X       -   right part, actual vector is stored in X[IX:IX+N-1]
    IX      -   offset

OUTPUT PARAMETERS
    X       -   solution replaces elements X[IX:IX+N-1]

  -- ALGLIB routine / remastering of LAPACK's DTRSV --
     (c) 2017 Bochkanov Sergey - converted to ALGLIB
     (c) 2016 Reference BLAS level1 routine (LAPACK version 3.7.0)
     Reference BLAS is a software package provided by Univ. of Tennessee,
     Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.
*************************************************************************/
void rmatrixtrsv(const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const bool isupper, const bool isunit, const ae_int_t optype, const real_1d_array &x, const ae_int_t ix, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixtrsv(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, isupper, isunit, optype, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates X*op(A^-1) where:
* X is MxN general matrix
* A is NxN upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition, conjugate transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     20.01.2018
     Bochkanov Sergey
*************************************************************************/
void cmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const complex_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates op(A^-1)*X where:
* X is MxN general matrix
* A is MxM upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition, conjugate transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void cmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const complex_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates X*op(A^-1) where:
* X is MxN general matrix
* A is NxN upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void rmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const real_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates op(A^-1)*X where:
* X is MxN general matrix
* A is MxM upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void rmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const real_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates  C=alpha*A*A^H+beta*C  or  C=alpha*A^H*A+beta*C
where:
* C is NxN Hermitian matrix given by its upper/lower triangle
* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise

Additional info:
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

INPUT PARAMETERS
    N       -   matrix size, N>=0
    K       -   matrix size, K>=0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset (row index)
    JA      -   submatrix offset (column index)
    OpTypeA -   multiplication type:
                * 0 - A*A^H is calculated
                * 2 - A^H*A is calculated
    Beta    -   coefficient
    C       -   preallocated input/output matrix
    IC      -   submatrix offset (row index)
    JC      -   submatrix offset (column index)
    IsUpper -   whether upper or lower triangle of C is updated;
                this function updates only one half of C, leaving
                other half unchanged (not referenced at all).

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     16.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void cmatrixherk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, const complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixherk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates  C=alpha*A*A^T+beta*C  or  C=alpha*A^T*A+beta*C
where:
* C is NxN symmetric matrix given by its upper/lower triangle
* A is NxK matrix when A*A^T is calculated, KxN matrix otherwise

Additional info:
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

INPUT PARAMETERS
    N       -   matrix size, N>=0
    K       -   matrix size, K>=0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset (row index)
    JA      -   submatrix offset (column index)
    OpTypeA -   multiplication type:
                * 0 - A*A^T is calculated
                * 2 - A^T*A is calculated
    Beta    -   coefficient
    C       -   preallocated input/output matrix
    IC      -   submatrix offset (row index)
    JC      -   submatrix offset (column index)
    IsUpper -   whether C is upper triangular or lower triangular

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     16.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void rmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, const real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
* C is MxN general matrix
* op1(A) is MxK matrix
* op2(B) is KxN matrix
* "op" may be identity transformation, transposition, conjugate transposition

Additional info:
* cache-oblivious algorithm is used.
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

IMPORTANT:

This function does NOT preallocate output matrix C, it MUST be preallocated
by caller prior to calling this function. In case C does not have  enough
space to store result, exception will be generated.

INPUT PARAMETERS
    M       -   matrix size, M>0
    N       -   matrix size, N>0
    K       -   matrix size, K>0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset
    JA      -   submatrix offset
    OpTypeA -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    B       -   matrix
    IB      -   submatrix offset
    JB      -   submatrix offset
    OpTypeB -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    Beta    -   coefficient
    C       -   matrix (PREALLOCATED, large enough to store result)
    IC      -   submatrix offset
    JC      -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     2009-2019
     Bochkanov Sergey
*************************************************************************/
void cmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const alglib::complex alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const alglib::complex beta, const complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixgemm(m, n, k, *alpha.c_ptr(), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, *beta.c_ptr(), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
* C is MxN general matrix
* op1(A) is MxK matrix
* op2(B) is KxN matrix
* "op" may be identity transformation, transposition

Additional info:
* cache-oblivious algorithm is used.
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

IMPORTANT:

This function does NOT preallocate output matrix C, it MUST be preallocated
by caller prior to calling this function. In case C does not have  enough
space to store result, exception will be generated.

INPUT PARAMETERS
    M       -   matrix size, M>0
    N       -   matrix size, N>0
    K       -   matrix size, K>0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset
    JA      -   submatrix offset
    OpTypeA -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    B       -   matrix
    IB      -   submatrix offset
    JB      -   submatrix offset
    OpTypeB -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    Beta    -   coefficient
    C       -   PREALLOCATED output matrix, large enough to store result
    IC      -   submatrix offset
    JC      -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     2009-2019
     Bochkanov Sergey
*************************************************************************/
void rmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const double beta, const real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixgemm(m, n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This subroutine is an older version of CMatrixHERK(), one with wrong  name
(it is HErmitian update, not SYmmetric). It  is  left  here  for  backward
compatibility.

  -- ALGLIB routine --
     16.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, const complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

#if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
QR decomposition of a rectangular matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices Q and R in compact form (see below).
    Tau -   array of scalar factors which are used to form
            matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].

Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.

The elements of matrix R are located on and above the main diagonal of
matrix A. The elements which are located in Tau array and below the main
diagonal of matrix A are used to form matrix Q as follows:

Matrix Q is represented as a product of elementary reflections

Q = H(0)*H(2)*...*H(k-1),

where k = min(m,n), and each H(i) is in the form

H(i) = 1 - tau * v * (v^T)

where tau is a scalar stored in Tau[I]; v - real vector,
so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixqr(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
LQ decomposition of a rectangular matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices L and Q in compact form (see below)
    Tau -   array of scalar factors which are used to form
            matrix Q. Array whose index ranges within [0..Min(M,N)-1].

Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.

The elements of matrix L are located on and below  the  main  diagonal  of
matrix A. The elements which are located in Tau array and above  the  main
diagonal of matrix A are used to form matrix Q as follows:

Matrix Q is represented as a product of elementary reflections

Q = H(k-1)*H(k-2)*...*H(1)*H(0),

where k = min(m,n), and each H(i) is of the form

H(i) = 1 - tau * v * (v^T)

where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlq(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
QR decomposition of a rectangular complex matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices Q and R in compact form
    Tau -   array of scalar factors which are used to form matrix Q. Array
            whose indexes range within [0.. Min(M,N)-1]

Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
MxM, R - upper triangular (or upper trapezoid) matrix of size MxN.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
void cmatrixqr(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
LQ decomposition of a rectangular complex matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices Q and L in compact form
    Tau -   array of scalar factors which are used to form matrix Q. Array
            whose indexes range within [0.. Min(M,N)-1]

Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
MxM, L - lower triangular (or lower trapezoid) matrix of size MxN.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
void cmatrixlq(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Partial unpacking of matrix Q from the QR decomposition of a matrix A

Input parameters:
    A       -   matrices Q and R in compact form.
                Output of RMatrixQR subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.
    Tau     -   scalar factors which are used to form Q.
                Output of the RMatrixQR subroutine.
    QColumns -  required number of columns of matrix Q. M>=QColumns>=0.

Output parameters:
    Q       -   first QColumns columns of matrix Q.
                Array whose indexes range within [0..M-1, 0..QColumns-1].
                If QColumns=0, the array remains unchanged.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixqrunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qcolumns, real_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking of matrix R from the QR decomposition of a matrix A

Input parameters:
    A       -   matrices Q and R in compact form.
                Output of RMatrixQR subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    R       -   matrix R, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixqrunpackr(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &r, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Partial unpacking of matrix Q from the LQ decomposition of a matrix A

Input parameters:
    A       -   matrices L and Q in compact form.
                Output of RMatrixLQ subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.
    Tau     -   scalar factors which are used to form Q.
                Output of the RMatrixLQ subroutine.
    QRows   -   required number of rows in matrix Q. N>=QRows>=0.

Output parameters:
    Q       -   first QRows rows of matrix Q. Array whose indexes range
                within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
                unchanged.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlqunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qrows, real_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking of matrix L from the LQ decomposition of a matrix A

Input parameters:
    A       -   matrices Q and L in compact form.
                Output of RMatrixLQ subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    L       -   matrix L, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlqunpackl(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &l, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Partial unpacking of matrix Q from QR decomposition of a complex matrix A.

Input parameters:
    A           -   matrices Q and R in compact form.
                    Output of CMatrixQR subroutine .
    M           -   number of rows in matrix A. M>=0.
    N           -   number of columns in matrix A. N>=0.
    Tau         -   scalar factors which are used to form Q.
                    Output of CMatrixQR subroutine .
    QColumns    -   required number of columns in matrix Q. M>=QColumns>=0.

Output parameters:
    Q           -   first QColumns columns of matrix Q.
                    Array whose index ranges within [0..M-1, 0..QColumns-1].
                    If QColumns=0, array isn't changed.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixqrunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qcolumns, complex_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking of matrix R from the QR decomposition of a matrix A

Input parameters:
    A       -   matrices Q and R in compact form.
                Output of CMatrixQR subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    R       -   matrix R, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixqrunpackr(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &r, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Partial unpacking of matrix Q from LQ decomposition of a complex matrix A.

Input parameters:
    A           -   matrices Q and R in compact form.
                    Output of CMatrixLQ subroutine .
    M           -   number of rows in matrix A. M>=0.
    N           -   number of columns in matrix A. N>=0.
    Tau         -   scalar factors which are used to form Q.
                    Output of CMatrixLQ subroutine .
    QRows       -   required number of rows in matrix Q. N>=QColumns>=0.

Output parameters:
    Q           -   first QRows rows of matrix Q.
                    Array whose index ranges within [0..QRows-1, 0..N-1].
                    If QRows=0, array isn't changed.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixlqunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qrows, complex_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking of matrix L from the LQ decomposition of a matrix A

Input parameters:
    A       -   matrices Q and L in compact form.
                Output of CMatrixLQ subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    L       -   matrix L, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixlqunpackl(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &l, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Reduction of a rectangular matrix to  bidiagonal form

The algorithm reduces the rectangular matrix A to  bidiagonal form by
orthogonal transformations P and Q: A = Q*B*(P^T).

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   source matrix. array[0..M-1, 0..N-1]
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.

Output parameters:
    A       -   matrices Q, B, P in compact form (see below).
    TauQ    -   scalar factors which are used to form matrix Q.
    TauP    -   scalar factors which are used to form matrix P.

The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
reflections which form MxM matrix Q and NxN matrix P, respectively.

If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).

If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
is stored in A(i,i+1:n-1).

EXAMPLE:

m=6, n=5 (m > n):               m=5, n=6 (m < n):

(  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
(  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
(  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
(  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
(  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
(  v1  v2  v3  v4  v5 )

Here vi and ui are vectors which form H(i) and G(i), and d and e -
are the diagonal and off-diagonal elements of matrix B.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994.
     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
     pseudocode, 2007-2010.
*************************************************************************/
void rmatrixbd(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tauq, real_1d_array &taup, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixbd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking matrix Q which reduces a matrix to bidiagonal form.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    QP          -   matrices Q and P in compact form.
                    Output of ToBidiagonal subroutine.
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    TAUQ        -   scalar factors which are used to form Q.
                    Output of ToBidiagonal subroutine.
    QColumns    -   required number of columns in matrix Q.
                    M>=QColumns>=0.

Output parameters:
    Q           -   first QColumns columns of matrix Q.
                    Array[0..M-1, 0..QColumns-1]
                    If QColumns=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdunpackq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, const ae_int_t qcolumns, real_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixbdunpackq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Multiplication by matrix Q which reduces matrix A to  bidiagonal form.

The algorithm allows pre- or post-multiply by Q or Q'.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    QP          -   matrices Q and P in compact form.
                    Output of ToBidiagonal subroutine.
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    TAUQ        -   scalar factors which are used to form Q.
                    Output of ToBidiagonal subroutine.
    Z           -   multiplied matrix.
                    array[0..ZRows-1,0..ZColumns-1]
    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
                    ZRows=M, otherwise ZRows can be arbitrary.
    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
                    ZColumns=M, otherwise ZColumns can be arbitrary.
    FromTheRight -  pre- or post-multiply.
    DoTranspose -   multiply by Q or Q'.

Output parameters:
    Z           -   product of Z and Q.
                    Array[0..ZRows-1,0..ZColumns-1]
                    If ZRows=0 or ZColumns=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdmultiplybyq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixbdmultiplybyq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking matrix P which reduces matrix A to bidiagonal form.
The subroutine returns transposed matrix P.

Input parameters:
    QP      -   matrices Q and P in compact form.
                Output of ToBidiagonal subroutine.
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.
    TAUP    -   scalar factors which are used to form P.
                Output of ToBidiagonal subroutine.
    PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.

Output parameters:
    PT      -   first PTRows columns of matrix P^T
                Array[0..PTRows-1, 0..N-1]
                If PTRows=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdunpackpt(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, const ae_int_t ptrows, real_2d_array &pt, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixbdunpackpt(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), ptrows, const_cast<alglib_impl::ae_matrix*>(pt.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Multiplication by matrix P which reduces matrix A to  bidiagonal form.

The algorithm allows pre- or post-multiply by P or P'.

Input parameters:
    QP          -   matrices Q and P in compact form.
                    Output of RMatrixBD subroutine.
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    TAUP        -   scalar factors which are used to form P.
                    Output of RMatrixBD subroutine.
    Z           -   multiplied matrix.
                    Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
                    ZRows=N, otherwise ZRows can be arbitrary.
    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
                    ZColumns=N, otherwise ZColumns can be arbitrary.
    FromTheRight -  pre- or post-multiply.
    DoTranspose -   multiply by P or P'.

Output parameters:
    Z - product of Z and P.
                Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
                If ZRows=0 or ZColumns=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdmultiplybyp(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixbdmultiplybyp(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking of the main and secondary diagonals of bidiagonal decomposition
of matrix A.

Input parameters:
    B   -   output of RMatrixBD subroutine.
    M   -   number of rows in matrix B.
    N   -   number of columns in matrix B.

Output parameters:
    IsUpper -   True, if the matrix is upper bidiagonal.
                otherwise IsUpper is False.
    D       -   the main diagonal.
                Array whose index ranges within [0..Min(M,N)-1].
    E       -   the secondary diagonal (upper or lower, depending on
                the value of IsUpper).
                Array index ranges within [0..Min(M,N)-1], the last
                element is not used.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdunpackdiagonals(const real_2d_array &b, const ae_int_t m, const ae_int_t n, bool &isupper, real_1d_array &d, real_1d_array &e, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixbdunpackdiagonals(const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), m, n, &isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Reduction of a square matrix to  upper Hessenberg form: Q'*A*Q = H,
where Q is an orthogonal matrix, H - Hessenberg matrix.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   matrix A with elements [0..N-1, 0..N-1]
    N       -   size of matrix A.

Output parameters:
    A       -   matrices Q and P in  compact form (see below).
    Tau     -   array of scalar factors which are used to form matrix Q.
                Array whose index ranges within [0..N-2]

Matrix H is located on the main diagonal, on the lower secondary  diagonal
and above the main diagonal of matrix A. The elements which are used to
form matrix Q are situated in array Tau and below the lower secondary
diagonal of matrix A as follows:

Matrix Q is represented as a product of elementary reflections

Q = H(0)*H(2)*...*H(n-2),

where each H(i) is given by

H(i) = 1 - tau * v * (v^T)

where tau is a scalar stored in Tau[I]; v - is a real vector,
so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i).

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
void rmatrixhessenberg(real_2d_array &a, const ae_int_t n, real_1d_array &tau, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixhessenberg(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking matrix Q which reduces matrix A to upper Hessenberg form

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A   -   output of RMatrixHessenberg subroutine.
    N   -   size of matrix A.
    Tau -   scalar factors which are used to form Q.
            Output of RMatrixHessenberg subroutine.

Output parameters:
    Q   -   matrix Q.
            Array whose indexes range within [0..N-1, 0..N-1].

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixhessenbergunpackq(const real_2d_array &a, const ae_int_t n, const real_1d_array &tau, real_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixhessenbergunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form)

Input parameters:
    A   -   output of RMatrixHessenberg subroutine.
    N   -   size of matrix A.

Output parameters:
    H   -   matrix H. Array whose indexes range within [0..N-1, 0..N-1].

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixhessenbergunpackh(const real_2d_array &a, const ae_int_t n, real_2d_array &h, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixhessenbergunpackh(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Reduction of a symmetric matrix which is given by its higher or lower
triangular part to a tridiagonal matrix using orthogonal similarity
transformation: Q'*A*Q=T.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   matrix to be transformed
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format. If IsUpper = True, then matrix A is given
                by its upper triangle, and the lower triangle is not used
                and not modified by the algorithm, and vice versa
                if IsUpper = False.

Output parameters:
    A       -   matrices T and Q in  compact form (see lower)
    Tau     -   array of factors which are forming matrices H(i)
                array with elements [0..N-2].
    D       -   main diagonal of symmetric matrix T.
                array with elements [0..N-1].
    E       -   secondary diagonal of symmetric matrix T.
                array with elements [0..N-2].


  If IsUpper=True, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(n-2) . . . H(2) H(0).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a real scalar, and v is a real vector with
  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
  A(0:i-1,i+1), and tau in TAU(i).

  If IsUpper=False, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(0) H(2) . . . H(n-2).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a real scalar, and v is a real vector with
  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
  and tau in TAU(i).

  The contents of A on exit are illustrated by the following examples
  with n = 5:

  if UPLO = 'U':                       if UPLO = 'L':

    (  d   e   v1  v2  v3 )              (  d                  )
    (      d   e   v2  v3 )              (  e   d              )
    (          d   e   v3 )              (  v0  e   d          )
    (              d   e  )              (  v0  v1  e   d      )
    (                  d  )              (  v0  v1  v2  e   d  )

  where d and e denote diagonal and off-diagonal elements of T, and vi
  denotes an element of the vector defining H(i).

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
void smatrixtd(real_2d_array &a, const ae_int_t n, const bool isupper, real_1d_array &tau, real_1d_array &d, real_1d_array &e, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::smatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking matrix Q which reduces symmetric matrix to a tridiagonal
form.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   the result of a SMatrixTD subroutine
    N       -   size of matrix A.
    IsUpper -   storage format (a parameter of SMatrixTD subroutine)
    Tau     -   the result of a SMatrixTD subroutine

Output parameters:
    Q       -   transformation matrix.
                array with elements [0..N-1, 0..N-1].

  -- ALGLIB --
     Copyright 2005-2010 by Bochkanov Sergey
*************************************************************************/
void smatrixtdunpackq(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &tau, real_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::smatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Reduction of a Hermitian matrix which is given  by  its  higher  or  lower
triangular part to a real  tridiagonal  matrix  using  unitary  similarity
transformation: Q'*A*Q = T.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   matrix to be transformed
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format. If IsUpper = True, then matrix A is  given
                by its upper triangle, and the lower triangle is not  used
                and not modified by the algorithm, and vice versa
                if IsUpper = False.

Output parameters:
    A       -   matrices T and Q in  compact form (see lower)
    Tau     -   array of factors which are forming matrices H(i)
                array with elements [0..N-2].
    D       -   main diagonal of real symmetric matrix T.
                array with elements [0..N-1].
    E       -   secondary diagonal of real symmetric matrix T.
                array with elements [0..N-2].


  If IsUpper=True, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(n-2) . . . H(2) H(0).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a complex scalar, and v is a complex vector with
  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
  A(0:i-1,i+1), and tau in TAU(i).

  If IsUpper=False, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(0) H(2) . . . H(n-2).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a complex scalar, and v is a complex vector with
  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
  and tau in TAU(i).

  The contents of A on exit are illustrated by the following examples
  with n = 5:

  if UPLO = 'U':                       if UPLO = 'L':

    (  d   e   v1  v2  v3 )              (  d                  )
    (      d   e   v2  v3 )              (  e   d              )
    (          d   e   v3 )              (  v0  e   d          )
    (              d   e  )              (  v0  v1  e   d      )
    (                  d  )              (  v0  v1  v2  e   d  )

where d and e denote diagonal and off-diagonal elements of T, and vi
denotes an element of the vector defining H(i).

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
void hmatrixtd(complex_2d_array &a, const ae_int_t n, const bool isupper, complex_1d_array &tau, real_1d_array &d, real_1d_array &e, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hmatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Unpacking matrix Q which reduces a Hermitian matrix to a real  tridiagonal
form.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   the result of a HMatrixTD subroutine
    N       -   size of matrix A.
    IsUpper -   storage format (a parameter of HMatrixTD subroutine)
    Tau     -   the result of a HMatrixTD subroutine

Output parameters:
    Q       -   transformation matrix.
                array with elements [0..N-1, 0..N-1].

  -- ALGLIB --
     Copyright 2005-2010 by Bochkanov Sergey
*************************************************************************/
void hmatrixtdunpackq(const complex_2d_array &a, const ae_int_t n, const bool isupper, const complex_1d_array &tau, complex_2d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hmatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

#if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Generation of a random uniformly distributed (Haar) orthogonal matrix

INPUT PARAMETERS:
    N   -   matrix size, N>=1

OUTPUT PARAMETERS:
    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]

NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
      "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
      Application to Condition Estimators".

      Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
      * takes an NxN one
      * takes uniformly distributed unit vector of dimension N+1.
      * constructs a Householder reflection from the vector, then applies
        it to the smaller matrix (embedded in the larger size with a 1 at
        the bottom right corner).

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndorthogonal(const ae_int_t n, real_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Generation of random NxN matrix with given condition number and norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Generation of a random Haar distributed orthogonal complex matrix

INPUT PARAMETERS:
    N   -   matrix size, N>=1

OUTPUT PARAMETERS:
    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]

NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
      "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
      Application to Condition Estimators".

      Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
      * takes an NxN one
      * takes uniformly distributed unit vector of dimension N+1.
      * constructs a Householder reflection from the vector, then applies
        it to the smaller matrix (embedded in the larger size with a 1 at
        the bottom right corner).

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndorthogonal(const ae_int_t n, complex_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Generation of random NxN complex matrix with given condition number C and
norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Generation of random NxN symmetric matrix with given condition number  and
norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void smatrixrndcond(const ae_int_t n, const double c, real_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::smatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Generation of random NxN symmetric positive definite matrix with given
condition number and norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random SPD matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void spdmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Generation of random NxN Hermitian matrix with given condition number  and
norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void hmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Generation of random NxN Hermitian positive definite matrix with given
condition number and norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random HPD matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void hpdmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hpdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndorthogonalfromtheright(real_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   Q*A, where Q is random MxM orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndorthogonalfromtheleft(real_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Multiplication of MxN complex matrix by NxN random Haar distributed
complex orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndorthogonalfromtheright(complex_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Multiplication of MxN complex matrix by MxM random Haar distributed
complex orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   Q*A, where Q is random MxM orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndorthogonalfromtheleft(complex_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Symmetric multiplication of NxN matrix by random Haar distributed
orthogonal  matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..N-1, 0..N-1]
    N   -   matrix size

OUTPUT PARAMETERS:
    A   -   Q'*A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void smatrixrndmultiply(real_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::smatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Hermitian multiplication of NxN matrix by random Haar distributed
complex orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..N-1, 0..N-1]
    N   -   matrix size

OUTPUT PARAMETERS:
    A   -   Q^H*A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void hmatrixrndmultiply(complex_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hmatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

#if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Sparse matrix structure.

You should use ALGLIB functions to work with sparse matrix. Never  try  to
access its fields directly!

NOTES ON THE SPARSE STORAGE FORMATS

Sparse matrices can be stored using several formats:
* Hash-Table representation
* Compressed Row Storage (CRS)
* Skyline matrix storage (SKS)

Each of the formats has benefits and drawbacks:
* Hash-table is good for dynamic operations (insertion of new elements),
  but does not support linear algebra operations
* CRS is good for operations like matrix-vector or matrix-matrix products,
  but its initialization is less convenient - you have to tell row   sizes
  at the initialization, and you have to fill  matrix  only  row  by  row,
  from left to right.
* SKS is a special format which is used to store triangular  factors  from
  Cholesky factorization. It does not support  dynamic  modification,  and
  support for linear algebra operations is very limited.

Tables below outline information about these two formats:

    OPERATIONS WITH MATRIX      HASH        CRS         SKS
    creation                    +           +           +
    SparseGet                   +           +           +
    SparseExists                +           +           +
    SparseRewriteExisting       +           +           +
    SparseSet                   +           +           +
    SparseAdd                   +
    SparseGetRow                            +           +
    SparseGetCompressedRow                  +           +
    sparse-dense linear algebra             +           +
*************************************************************************/
_sparsematrix_owner::_sparsematrix_owner()
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_sparsematrix_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    p_struct = (alglib_impl::sparsematrix*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsematrix), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::sparsematrix));
    alglib_impl::_sparsematrix_init(p_struct, &_state, ae_false);
    ae_state_clear(&_state);
}

_sparsematrix_owner::_sparsematrix_owner(const _sparsematrix_owner &rhs)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_sparsematrix_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsematrix copy constructor failure (source is not initialized)", &_state);
    p_struct = (alglib_impl::sparsematrix*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsematrix), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::sparsematrix));
    alglib_impl::_sparsematrix_init_copy(p_struct, const_cast<alglib_impl::sparsematrix*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
}

_sparsematrix_owner& _sparsematrix_owner::operator=(const _sparsematrix_owner &rhs)
{
    if( this==&rhs )
        return *this;
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return *this;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: sparsematrix assignment constructor failure (destination is not initialized)", &_state);
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsematrix assignment constructor failure (source is not initialized)", &_state);
    alglib_impl::_sparsematrix_destroy(p_struct);
    memset(p_struct, 0, sizeof(alglib_impl::sparsematrix));
    alglib_impl::_sparsematrix_init_copy(p_struct, const_cast<alglib_impl::sparsematrix*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
    return *this;
}

_sparsematrix_owner::~_sparsematrix_owner()
{
    if( p_struct!=NULL )
    {
        alglib_impl::_sparsematrix_destroy(p_struct);
        ae_free(p_struct);
    }
}

alglib_impl::sparsematrix* _sparsematrix_owner::c_ptr()
{
    return p_struct;
}

alglib_impl::sparsematrix* _sparsematrix_owner::c_ptr() const
{
    return const_cast<alglib_impl::sparsematrix*>(p_struct);
}
sparsematrix::sparsematrix() : _sparsematrix_owner()
{
}

sparsematrix::sparsematrix(const sparsematrix &rhs):_sparsematrix_owner(rhs)
{
}

sparsematrix& sparsematrix::operator=(const sparsematrix &rhs)
{
    if( this==&rhs )
        return *this;
    _sparsematrix_owner::operator=(rhs);
    return *this;
}

sparsematrix::~sparsematrix()
{
}


/*************************************************************************
Temporary buffers for sparse matrix operations.

You should pass an instance of this structure to factorization  functions.
It allows to reuse memory during repeated sparse  factorizations.  You  do
not have to call some initialization function - simply passing an instance
to factorization function is enough.
*************************************************************************/
_sparsebuffers_owner::_sparsebuffers_owner()
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_sparsebuffers_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    p_struct = (alglib_impl::sparsebuffers*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsebuffers), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::sparsebuffers));
    alglib_impl::_sparsebuffers_init(p_struct, &_state, ae_false);
    ae_state_clear(&_state);
}

_sparsebuffers_owner::_sparsebuffers_owner(const _sparsebuffers_owner &rhs)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_sparsebuffers_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsebuffers copy constructor failure (source is not initialized)", &_state);
    p_struct = (alglib_impl::sparsebuffers*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsebuffers), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::sparsebuffers));
    alglib_impl::_sparsebuffers_init_copy(p_struct, const_cast<alglib_impl::sparsebuffers*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
}

_sparsebuffers_owner& _sparsebuffers_owner::operator=(const _sparsebuffers_owner &rhs)
{
    if( this==&rhs )
        return *this;
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return *this;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: sparsebuffers assignment constructor failure (destination is not initialized)", &_state);
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsebuffers assignment constructor failure (source is not initialized)", &_state);
    alglib_impl::_sparsebuffers_destroy(p_struct);
    memset(p_struct, 0, sizeof(alglib_impl::sparsebuffers));
    alglib_impl::_sparsebuffers_init_copy(p_struct, const_cast<alglib_impl::sparsebuffers*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
    return *this;
}

_sparsebuffers_owner::~_sparsebuffers_owner()
{
    if( p_struct!=NULL )
    {
        alglib_impl::_sparsebuffers_destroy(p_struct);
        ae_free(p_struct);
    }
}

alglib_impl::sparsebuffers* _sparsebuffers_owner::c_ptr()
{
    return p_struct;
}

alglib_impl::sparsebuffers* _sparsebuffers_owner::c_ptr() const
{
    return const_cast<alglib_impl::sparsebuffers*>(p_struct);
}
sparsebuffers::sparsebuffers() : _sparsebuffers_owner()
{
}

sparsebuffers::sparsebuffers(const sparsebuffers &rhs):_sparsebuffers_owner(rhs)
{
}

sparsebuffers& sparsebuffers::operator=(const sparsebuffers &rhs)
{
    if( this==&rhs )
        return *this;
    _sparsebuffers_owner::operator=(rhs);
    return *this;
}

sparsebuffers::~sparsebuffers()
{
}


/*************************************************************************
This function serializes data structure to string.

Important properties of s_out:
* it contains alphanumeric characters, dots, underscores, minus signs
* these symbols are grouped into words, which are separated by spaces
  and Windows-style (CR+LF) newlines
* although  serializer  uses  spaces and CR+LF as separators, you can
  replace any separator character by arbitrary combination of spaces,
  tabs, Windows or Unix newlines. It allows flexible reformatting  of
  the  string  in  case you want to include it into text or XML file.
  But you should not insert separators into the middle of the "words"
  nor you should change case of letters.
* s_out can be freely moved between 32-bit and 64-bit systems, little
  and big endian machines, and so on. You can serialize structure  on
  32-bit machine and unserialize it on 64-bit one (or vice versa), or
  serialize  it  on  SPARC  and  unserialize  on  x86.  You  can also
  serialize  it  in  C++ version of ALGLIB and unserialize in C# one,
  and vice versa.
*************************************************************************/
void sparseserialize(sparsematrix &obj, std::string &s_out)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state state;
    alglib_impl::ae_serializer serializer;
    alglib_impl::ae_int_t ssize;

    alglib_impl::ae_state_init(&state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&state, &_break_jump);
    alglib_impl::ae_serializer_init(&serializer);
    alglib_impl::ae_serializer_alloc_start(&serializer);
    alglib_impl::sparsealloc(&serializer, obj.c_ptr(), &state);
    ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
    s_out.clear();
    s_out.reserve((size_t)(ssize+1));
    alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
    alglib_impl::sparseserialize(&serializer, obj.c_ptr(), &state);
    alglib_impl::ae_serializer_stop(&serializer, &state);
    alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
    alglib_impl::ae_serializer_clear(&serializer);
    alglib_impl::ae_state_clear(&state);
}
/*************************************************************************
This function unserializes data structure from string.
*************************************************************************/
void sparseunserialize(const std::string &s_in, sparsematrix &obj)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state state;
    alglib_impl::ae_serializer serializer;

    alglib_impl::ae_state_init(&state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&state, &_break_jump);
    alglib_impl::ae_serializer_init(&serializer);
    alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
    alglib_impl::sparseunserialize(&serializer, obj.c_ptr(), &state);
    alglib_impl::ae_serializer_stop(&serializer, &state);
    alglib_impl::ae_serializer_clear(&serializer);
    alglib_impl::ae_state_clear(&state);
}


/*************************************************************************
This function serializes data structure to C++ stream.

Data stream generated by this function is same as  string  representation
generated  by  string  version  of  serializer - alphanumeric characters,
dots, underscores, minus signs, which are grouped into words separated by
spaces and CR+LF.

We recommend you to read comments on string version of serializer to find
out more about serialization of AlGLIB objects.
*************************************************************************/
void sparseserialize(sparsematrix &obj, std::ostream &s_out)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state state;
    alglib_impl::ae_serializer serializer;

    alglib_impl::ae_state_init(&state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&state, &_break_jump);
    alglib_impl::ae_serializer_init(&serializer);
    alglib_impl::ae_serializer_alloc_start(&serializer);
    alglib_impl::sparsealloc(&serializer, obj.c_ptr(), &state);
    alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
    alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
    alglib_impl::sparseserialize(&serializer, obj.c_ptr(), &state);
    alglib_impl::ae_serializer_stop(&serializer, &state);
    alglib_impl::ae_serializer_clear(&serializer);
    alglib_impl::ae_state_clear(&state);
}
/*************************************************************************
This function unserializes data structure from stream.
*************************************************************************/
void sparseunserialize(const std::istream &s_in, sparsematrix &obj)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state state;
    alglib_impl::ae_serializer serializer;

    alglib_impl::ae_state_init(&state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&state, &_break_jump);
    alglib_impl::ae_serializer_init(&serializer);
    alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
    alglib_impl::sparseunserialize(&serializer, obj.c_ptr(), &state);
    alglib_impl::ae_serializer_stop(&serializer, &state);
    alglib_impl::ae_serializer_clear(&serializer);
    alglib_impl::ae_state_clear(&state);
}

/*************************************************************************
This function creates sparse matrix in a Hash-Table format.

This function creates Hast-Table matrix, which can be  converted  to  CRS
format after its initialization is over. Typical  usage  scenario  for  a
sparse matrix is:
1. creation in a Hash-Table format
2. insertion of the matrix elements
3. conversion to the CRS representation
4. matrix is passed to some linear algebra algorithm

Some  information  about  different matrix formats can be found below, in
the "NOTES" section.

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    K           -   K>=0, expected number of non-zero elements in a matrix.
                    K can be inexact approximation, can be less than actual
                    number  of  elements  (table will grow when needed) or
                    even zero).
                    It is important to understand that although hash-table
                    may grow automatically, it is better to  provide  good
                    estimate of data size.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    All elements of the matrix are zero.

NOTE 1

Hash-tables use memory inefficiently, and they have to keep  some  amount
of the "spare memory" in order to have good performance. Hash  table  for
matrix with K non-zero elements will  need  C*K*(8+2*sizeof(int))  bytes,
where C is a small constant, about 1.5-2 in magnitude.

CRS storage, from the other side, is  more  memory-efficient,  and  needs
just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number  of  rows
in a matrix.

When you convert from the Hash-Table to CRS  representation, all unneeded
memory will be freed.

NOTE 2

Comments of SparseMatrix structure outline  information  about  different
sparse storage formats. We recommend you to read them before starting  to
use ALGLIB sparse matrices.

NOTE 3

This function completely  overwrites S with new sparse matrix. Previously
allocated storage is NOT reused. If you  want  to reuse already allocated
memory, call SparseCreateBuf function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecreate(const ae_int_t m, const ae_int_t n, const ae_int_t k, sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreate(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function creates sparse matrix in a Hash-Table format.

This function creates Hast-Table matrix, which can be  converted  to  CRS
format after its initialization is over. Typical  usage  scenario  for  a
sparse matrix is:
1. creation in a Hash-Table format
2. insertion of the matrix elements
3. conversion to the CRS representation
4. matrix is passed to some linear algebra algorithm

Some  information  about  different matrix formats can be found below, in
the "NOTES" section.

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    K           -   K>=0, expected number of non-zero elements in a matrix.
                    K can be inexact approximation, can be less than actual
                    number  of  elements  (table will grow when needed) or
                    even zero).
                    It is important to understand that although hash-table
                    may grow automatically, it is better to  provide  good
                    estimate of data size.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    All elements of the matrix are zero.

NOTE 1

Hash-tables use memory inefficiently, and they have to keep  some  amount
of the "spare memory" in order to have good performance. Hash  table  for
matrix with K non-zero elements will  need  C*K*(8+2*sizeof(int))  bytes,
where C is a small constant, about 1.5-2 in magnitude.

CRS storage, from the other side, is  more  memory-efficient,  and  needs
just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number  of  rows
in a matrix.

When you convert from the Hash-Table to CRS  representation, all unneeded
memory will be freed.

NOTE 2

Comments of SparseMatrix structure outline  information  about  different
sparse storage formats. We recommend you to read them before starting  to
use ALGLIB sparse matrices.

NOTE 3

This function completely  overwrites S with new sparse matrix. Previously
allocated storage is NOT reused. If you  want  to reuse already allocated
memory, call SparseCreateBuf function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void sparsecreate(const ae_int_t m, const ae_int_t n, sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t k;

    k = 0;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreate(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
This version of SparseCreate function creates sparse matrix in Hash-Table
format, reusing previously allocated storage as much  as  possible.  Read
comments for SparseCreate() for more information.

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    K           -   K>=0, expected number of non-zero elements in a matrix.
                    K can be inexact approximation, can be less than actual
                    number  of  elements  (table will grow when needed) or
                    even zero).
                    It is important to understand that although hash-table
                    may grow automatically, it is better to  provide  good
                    estimate of data size.
    S           -   SparseMatrix structure which MAY contain some  already
                    allocated storage.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    All elements of the matrix are zero.
                    Previously allocated storage is reused, if  its  size
                    is compatible with expected number of non-zeros K.

  -- ALGLIB PROJECT --
     Copyright 14.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatebuf(const ae_int_t m, const ae_int_t n, const ae_int_t k, const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatebuf(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This version of SparseCreate function creates sparse matrix in Hash-Table
format, reusing previously allocated storage as much  as  possible.  Read
comments for SparseCreate() for more information.

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    K           -   K>=0, expected number of non-zero elements in a matrix.
                    K can be inexact approximation, can be less than actual
                    number  of  elements  (table will grow when needed) or
                    even zero).
                    It is important to understand that although hash-table
                    may grow automatically, it is better to  provide  good
                    estimate of data size.
    S           -   SparseMatrix structure which MAY contain some  already
                    allocated storage.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    All elements of the matrix are zero.
                    Previously allocated storage is reused, if  its  size
                    is compatible with expected number of non-zeros K.

  -- ALGLIB PROJECT --
     Copyright 14.01.2014 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void sparsecreatebuf(const ae_int_t m, const ae_int_t n, const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t k;

    k = 0;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatebuf(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
This function creates sparse matrix in a CRS format (expert function for
situations when you are running out of memory).

This function creates CRS matrix. Typical usage scenario for a CRS matrix
is:
1. creation (you have to tell number of non-zero elements at each row  at
   this moment)
2. insertion of the matrix elements (row by row, from left to right)
3. matrix is passed to some linear algebra algorithm

This function is a memory-efficient alternative to SparseCreate(), but it
is more complex because it requires you to know in advance how large your
matrix is. Some  information about  different matrix formats can be found
in comments on SparseMatrix structure.  We recommend  you  to  read  them
before starting to use ALGLIB sparse matrices..

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    NER         -   number of elements at each row, array[M], NER[I]>=0

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in CRS representation.
                    You have to fill ALL non-zero elements by calling
                    SparseSet() BEFORE you try to use this matrix.

NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
      Previously allocated storage is NOT reused. If you  want  to  reuse
      already allocated memory, call SparseCreateCRSBuf function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecreatecrs(const ae_int_t m, const ae_int_t n, const integer_1d_array &ner, sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatecrs(m, n, const_cast<alglib_impl::ae_vector*>(ner.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function creates sparse matrix in a CRS format (expert function  for
situations when you are running out  of  memory).  This  version  of  CRS
matrix creation function may reuse memory already allocated in S.

This function creates CRS matrix. Typical usage scenario for a CRS matrix
is:
1. creation (you have to tell number of non-zero elements at each row  at
   this moment)
2. insertion of the matrix elements (row by row, from left to right)
3. matrix is passed to some linear algebra algorithm

This function is a memory-efficient alternative to SparseCreate(), but it
is more complex because it requires you to know in advance how large your
matrix is. Some  information about  different matrix formats can be found
in comments on SparseMatrix structure.  We recommend  you  to  read  them
before starting to use ALGLIB sparse matrices..

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    NER         -   number of elements at each row, array[M], NER[I]>=0
    S           -   sparse matrix structure with possibly preallocated
                    memory.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in CRS representation.
                    You have to fill ALL non-zero elements by calling
                    SparseSet() BEFORE you try to use this matrix.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecreatecrsbuf(const ae_int_t m, const ae_int_t n, const integer_1d_array &ner, const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatecrsbuf(m, n, const_cast<alglib_impl::ae_vector*>(ner.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function creates sparse matrix in  a  SKS  format  (skyline  storage
format). In most cases you do not need this function - CRS format  better
suits most use cases.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    D           -   "bottom" bandwidths, array[M], D[I]>=0.
                    I-th element stores number of non-zeros at I-th  row,
                    below the diagonal (diagonal itself is not  included)
    U           -   "top" bandwidths, array[N], U[I]>=0.
                    I-th element stores number of non-zeros  at I-th row,
                    above the diagonal (diagonal itself  is not included)

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to change their values.

NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
      Previously allocated storage is NOT reused. If you  want  to  reuse
      already allocated memory, call SparseCreateSKSBuf function.

  -- ALGLIB PROJECT --
     Copyright 13.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesks(const ae_int_t m, const ae_int_t n, const integer_1d_array &d, const integer_1d_array &u, sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatesks(m, n, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This is "buffered"  version  of  SparseCreateSKS()  which  reuses  memory
previously allocated in S (of course, memory is reallocated if needed).

This function creates sparse matrix in  a  SKS  format  (skyline  storage
format). In most cases you do not need this function - CRS format  better
suits most use cases.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    D           -   "bottom" bandwidths, array[M], 0<=D[I]<=I.
                    I-th element stores number of non-zeros at I-th row,
                    below the diagonal (diagonal itself is not included)
    U           -   "top" bandwidths, array[N], 0<=U[I]<=I.
                    I-th element stores number of non-zeros at I-th row,
                    above the diagonal (diagonal itself is not included)

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to change their values.

  -- ALGLIB PROJECT --
     Copyright 13.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesksbuf(const ae_int_t m, const ae_int_t n, const integer_1d_array &d, const integer_1d_array &u, const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatesksbuf(m, n, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function creates sparse matrix in  a  SKS  format  (skyline  storage
format). Unlike more general  sparsecreatesks(),  this  function  creates
sparse matrix with constant bandwidth.

You may want to use this function instead of sparsecreatesks() when  your
matrix has  constant  or  nearly-constant  bandwidth,  and  you  want  to
simplify source code.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    BW          -   matrix bandwidth, BW>=0

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to  change  their values.

NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
      Previously allocated storage is NOT reused. If you  want  to  reuse
      already allocated memory, call sparsecreatesksbandbuf function.

  -- ALGLIB PROJECT --
     Copyright 25.12.2017 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesksband(const ae_int_t m, const ae_int_t n, const ae_int_t bw, sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatesksband(m, n, bw, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This is "buffered" version  of  sparsecreatesksband() which reuses memory
previously allocated in S (of course, memory is reallocated if needed).

You may want to use this function instead  of  sparsecreatesksbuf()  when
your matrix has  constant or nearly-constant  bandwidth,  and you want to
simplify source code.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    BW          -   bandwidth, BW>=0

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to change their values.

  -- ALGLIB PROJECT --
     Copyright 13.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesksbandbuf(const ae_int_t m, const ae_int_t n, const ae_int_t bw, const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecreatesksbandbuf(m, n, bw, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function copies S0 to S1.
This function completely deallocates memory owned by S1 before creating a
copy of S0. If you want to reuse memory, use SparseCopyBuf.

NOTE:  this  function  does  not verify its arguments, it just copies all
fields of the structure.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecopy(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopy(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function copies S0 to S1.
Memory already allocated in S1 is reused as much as possible.

NOTE:  this  function  does  not verify its arguments, it just copies all
fields of the structure.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecopybuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopybuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function efficiently swaps contents of S0 and S1.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparseswap(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseswap(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function adds value to S[i,j] - element of the sparse matrix. Matrix
must be in a Hash-Table mode.

In case S[i,j] already exists in the table, V i added to  its  value.  In
case  S[i,j]  is  non-existent,  it  is  inserted  in  the  table.  Table
automatically grows when necessary.

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    Exception will be thrown for CRS matrix.
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N
    V           -   value to add, must be finite number

OUTPUT PARAMETERS
    S           -   modified matrix

NOTE 1:  when  S[i,j]  is exactly zero after modification, it is  deleted
from the table.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseadd(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const double v, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseadd(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, v, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function modifies S[i,j] - element of the sparse matrix.

For Hash-based storage format:
* this function can be called at any moment - during matrix initialization
  or later
* new value can be zero or non-zero.  In case new value of S[i,j] is zero,
  this element is deleted from the table.
* this  function  has  no  effect when called with zero V for non-existent
  element.

For CRS-bases storage format:
* this function can be called ONLY DURING MATRIX INITIALIZATION
* zero values are stored in the matrix similarly to non-zero ones
* elements must be initialized in correct order -  from top row to bottom,
  within row - from left to right.

For SKS storage:
* this function can be called at any moment - during matrix initialization
  or later
* zero values are stored in the matrix similarly to non-zero ones
* this function CAN NOT be called for non-existent (outside  of  the  band
  specified during SKS matrix creation) elements. Say, if you created  SKS
  matrix  with  bandwidth=2  and  tried to call sparseset(s,0,10,VAL),  an
  exception will be generated.

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table, SKS or CRS format.
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N
    V           -   value to set, must be finite number, can be zero

OUTPUT PARAMETERS
    S           -   modified matrix

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseset(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const double v, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseset(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, v, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function returns S[i,j] - element of the sparse matrix.  Matrix  can
be in any mode (Hash-Table, CRS, SKS), but this function is less efficient
for CRS matrices. Hash-Table and SKS matrices can find  element  in  O(1)
time, while  CRS  matrices need O(log(RS)) time, where RS is an number of
non-zero elements in a row.

INPUT PARAMETERS
    S           -   sparse M*N matrix
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N

RESULT
    value of S[I,J] or zero (in case no element with such index is found)

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
double sparseget(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::sparseget(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
This function checks whether S[i,j] is present in the sparse  matrix.  It
returns True even for elements  that  are  numerically  zero  (but  still
have place allocated for them).

The matrix  can be in any mode (Hash-Table, CRS, SKS), but this  function
is less efficient for CRS matrices. Hash-Table and SKS matrices can  find
element in O(1) time, while  CRS  matrices need O(log(RS)) time, where RS
is an number of non-zero elements in a row.

INPUT PARAMETERS
    S           -   sparse M*N matrix
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N

RESULT
    whether S[I,J] is present in the data structure or not

  -- ALGLIB PROJECT --
     Copyright 14.10.2020 by Bochkanov Sergey
*************************************************************************/
bool sparseexists(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparseexists(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
This function returns I-th diagonal element of the sparse matrix.

Matrix can be in any mode (Hash-Table or CRS storage), but this  function
is most efficient for CRS matrices - it requires less than 50 CPU  cycles
to extract diagonal element. For Hash-Table matrices we still  have  O(1)
query time, but function is many times slower.

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    Exception will be thrown for CRS matrix.
    I           -   index of the element to modify, 0<=I<min(M,N)

RESULT
    value of S[I,I] or zero (in case no element with such index is found)

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
double sparsegetdiagonal(const sparsematrix &s, const ae_int_t i, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::sparsegetdiagonal(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
This function calculates matrix-vector product  S*x.  Matrix  S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    Y           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y           -   array[M], S*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemv(const sparsematrix &s, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsemv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function calculates matrix-vector product  S^T*x. Matrix S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    X           -   array[M], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least M, but we do not check for NAN's or INF's.
    Y           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y           -   array[N], S^T*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemtv(const sparsematrix &s, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsemtv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function calculates generalized sparse matrix-vector product

    y := alpha*op(S)*x + beta*y

Matrix S must be stored in CRS or SKS format (exception  will  be  thrown
otherwise). op(S) can be either S or S^T.

NOTE: this  function  expects  Y  to  be  large enough to store result. No
      automatic preallocation happens for smaller arrays.

INPUT PARAMETERS
    S           -   sparse matrix in CRS or SKS format.
    Alpha       -   source coefficient
    OpS         -   operation type:
                    * OpS=0     =>  op(S) = S
                    * OpS=1     =>  op(S) = S^T
    X           -   input vector, must have at least Cols(op(S))+IX elements
    IX          -   subvector offset
    Beta        -   destination coefficient
    Y           -   preallocated output array, must have at least Rows(op(S))+IY elements
    IY          -   subvector offset

OUTPUT PARAMETERS
    Y           -   elements [IY...IY+Rows(op(S))-1] are replaced by result,
                    other elements are not modified

HANDLING OF SPECIAL CASES:
* below M=Rows(op(S)) and N=Cols(op(S)). Although current  ALGLIB  version
  does not allow you to  create  zero-sized  sparse  matrices,  internally
  ALGLIB  can  deal  with  such matrices. So, comments for M or N equal to
  zero are for internal use only.
* if M=0, then subroutine does nothing. It does not even touch arrays.
* if N=0 or Alpha=0.0, then:
  * if Beta=0, then Y is filled by zeros. S and X are  not  referenced  at
    all. Initial values of Y are ignored (we do not  multiply  Y by  zero,
    we just rewrite it by zeros)
  * if Beta<>0, then Y is replaced by Beta*Y
* if M>0, N>0, Alpha<>0, but  Beta=0, then  Y is replaced by alpha*op(S)*x
  initial state of Y  is ignored (rewritten without initial multiplication
  by zeros).

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 10.12.2019 by Bochkanov Sergey
*************************************************************************/
void sparsegemv(const sparsematrix &s, const double alpha, const ae_int_t ops, const real_1d_array &x, const ae_int_t ix, const double beta, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsegemv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), alpha, ops, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, beta, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function simultaneously calculates two matrix-vector products:
    S*x and S^T*x.
S must be square (non-rectangular) matrix stored in  CRS  or  SKS  format
(exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse N*N matrix in CRS or SKS format.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    Y0          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.
    Y1          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y0          -   array[N], S*x
    Y1          -   array[N], S^T*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemv2(const sparsematrix &s, const real_1d_array &x, real_1d_array &y0, real_1d_array &y1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsemv2(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y0.c_ptr()), const_cast<alglib_impl::ae_vector*>(y1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function calculates matrix-vector product  S*x, when S is  symmetric
matrix. Matrix S  must be stored in CRS or SKS format  (exception will be
thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*M matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is given:
                    * if upper triangle is given,  only   S[i,j] for j>=i
                      are used, and lower triangle is ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for j<=i
                      are used, and upper triangle is ignored.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    Y           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y           -   array[M], S*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsesmv(const sparsematrix &s, const bool isupper, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsesmv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function calculates vector-matrix-vector product x'*S*x, where  S is
symmetric matrix. Matrix S must be stored in CRS or SKS format (exception
will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*M matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is given:
                    * if upper triangle is given,  only   S[i,j] for j>=i
                      are used, and lower triangle is ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for j<=i
                      are used, and upper triangle is ignored.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.

RESULT
    x'*S*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 27.01.2014 by Bochkanov Sergey
*************************************************************************/
double sparsevsmv(const sparsematrix &s, const bool isupper, const real_1d_array &x, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::sparsevsmv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
This function calculates matrix-matrix product  S*A.  Matrix  S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    A           -   array[N][K], input dense matrix. For  performance reasons
                    we make only quick checks - we check that array size
                    is at least N, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B           -   array[M][K], S*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemm(const sparsematrix &s, const real_2d_array &a, const ae_int_t k, real_2d_array &b, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsemm(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function calculates matrix-matrix product  S^T*A. Matrix S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    A           -   array[M][K], input dense matrix. For performance reasons
                    we make only quick checks - we check that array size  is
                    at least M, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B           -   array[N][K], S^T*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemtm(const sparsematrix &s, const real_2d_array &a, const ae_int_t k, real_2d_array &b, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsemtm(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function simultaneously calculates two matrix-matrix products:
    S*A and S^T*A.
S  must  be  square (non-rectangular) matrix stored in CRS or  SKS  format
(exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse N*N matrix in CRS or SKS format.
    A           -   array[N][K], input dense matrix. For performance reasons
                    we make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B0          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.
    B1          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B0          -   array[N][K], S*A
    B1          -   array[N][K], S^T*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemm2(const sparsematrix &s, const real_2d_array &a, const ae_int_t k, real_2d_array &b0, real_2d_array &b1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsemm2(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b0.c_ptr()), const_cast<alglib_impl::ae_matrix*>(b1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function calculates matrix-matrix product  S*A, when S  is  symmetric
matrix. Matrix S must be stored in CRS or SKS format  (exception  will  be
thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*M matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is given:
                    * if upper triangle is given,  only   S[i,j] for j>=i
                      are used, and lower triangle is ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for j<=i
                      are used, and upper triangle is ignored.
    A           -   array[N][K], input dense matrix. For performance reasons
                    we make only quick checks - we check that array size is
                    at least N, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B           -   array[M][K], S*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsesmm(const sparsematrix &s, const bool isupper, const real_2d_array &a, const ae_int_t k, real_2d_array &b, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsesmm(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function calculates matrix-vector product op(S)*x, when x is  vector,
S is symmetric triangular matrix, op(S) is transposition or no  operation.
Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
otherwise).

INPUT PARAMETERS
    S           -   sparse square matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is used:
                    * if upper triangle is given,  only   S[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    IsUnit      -   unit or non-unit diagonal:
                    * if True, diagonal elements of triangular matrix  are
                      considered equal to 1.0. Actual elements  stored  in
                      S are not referenced at all.
                    * if False, diagonal stored in S is used
    OpType      -   operation type:
                    * if 0, S*x is calculated
                    * if 1, (S^T)*x is calculated (transposition)
    X           -   array[N] which stores input  vector.  For  performance
                    reasons we make only quick  checks  -  we  check  that
                    array  size  is  at  least  N, but we do not check for
                    NAN's or INF's.
    Y           -   possibly  preallocated  input   buffer.  Automatically
                    resized if its size is too small.

OUTPUT PARAMETERS
    Y           -   array[N], op(S)*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 20.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsetrmv(const sparsematrix &s, const bool isupper, const bool isunit, const ae_int_t optype, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsetrmv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, isunit, optype, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function solves linear system op(S)*y=x  where  x  is  vector,  S  is
symmetric  triangular  matrix,  op(S)  is  transposition  or no operation.
Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
otherwise).

INPUT PARAMETERS
    S           -   sparse square matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is used:
                    * if upper triangle is given,  only   S[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    IsUnit      -   unit or non-unit diagonal:
                    * if True, diagonal elements of triangular matrix  are
                      considered equal to 1.0. Actual elements  stored  in
                      S are not referenced at all.
                    * if False, diagonal stored in S is used. It  is  your
                      responsibility  to  make  sure  that   diagonal   is
                      non-zero.
    OpType      -   operation type:
                    * if 0, S*x is calculated
                    * if 1, (S^T)*x is calculated (transposition)
    X           -   array[N] which stores input  vector.  For  performance
                    reasons we make only quick  checks  -  we  check  that
                    array  size  is  at  least  N, but we do not check for
                    NAN's or INF's.

OUTPUT PARAMETERS
    X           -   array[N], inv(op(S))*x

NOTE: this function throws exception when called for  non-CRS/SKS  matrix.
      You must convert your matrix  with  SparseConvertToCRS/SKS()  before
      using this function.

NOTE: no assertion or tests are done during algorithm  operation.   It  is
      your responsibility to provide invertible matrix to algorithm.

  -- ALGLIB PROJECT --
     Copyright 20.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsetrsv(const sparsematrix &s, const bool isupper, const bool isunit, const ae_int_t optype, const real_1d_array &x, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsetrsv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, isunit, optype, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function applies permutation given by permutation table P (as opposed
to product form of permutation) to sparse symmetric  matrix  A,  given  by
either upper or lower triangle: B := P*A*P'.

This function allocates completely new instance of B. Use buffered version
SparseSymmPermTblBuf() if you want to reuse already allocated structure.

INPUT PARAMETERS
    A           -   sparse square matrix in CRS format.
    IsUpper     -   whether upper or lower triangle of A is used:
                    * if upper triangle is given,  only   A[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   A[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    P           -   array[N] which stores permutation table;  P[I]=J means
                    that I-th row/column of matrix  A  is  moved  to  J-th
                    position. For performance reasons we do NOT check that
                    P[] is  a   correct   permutation  (that there  is  no
                    repetitions, just that all its elements  are  in [0,N)
                    range.

OUTPUT PARAMETERS
    B           -   permuted matrix.  Permutation  is  applied  to A  from
                    the both sides, only upper or lower triangle (depending
                    on IsUpper) is stored.

NOTE: this function throws exception when called for non-CRS  matrix.  You
      must convert your matrix with SparseConvertToCRS() before using this
      function.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
void sparsesymmpermtbl(const sparsematrix &a, const bool isupper, const integer_1d_array &p, sparsematrix &b, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsesymmpermtbl(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), const_cast<alglib_impl::sparsematrix*>(b.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function is a buffered version  of  SparseSymmPermTbl()  that  reuses
previously allocated storage in B as much as possible.

This function applies permutation given by permutation table P (as opposed
to product form of permutation) to sparse symmetric  matrix  A,  given  by
either upper or lower triangle: B := P*A*P'.

INPUT PARAMETERS
    A           -   sparse square matrix in CRS format.
    IsUpper     -   whether upper or lower triangle of A is used:
                    * if upper triangle is given,  only   A[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   A[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    P           -   array[N] which stores permutation table;  P[I]=J means
                    that I-th row/column of matrix  A  is  moved  to  J-th
                    position. For performance reasons we do NOT check that
                    P[] is  a   correct   permutation  (that there  is  no
                    repetitions, just that all its elements  are  in [0,N)
                    range.
    B           -   sparse matrix object that will hold output.
                    Previously allocated memory will be reused as much  as
                    possible.

OUTPUT PARAMETERS
    B           -   permuted matrix.  Permutation  is  applied  to A  from
                    the both sides, only upper or lower triangle (depending
                    on IsUpper) is stored.

NOTE: this function throws exception when called for non-CRS  matrix.  You
      must convert your matrix with SparseConvertToCRS() before using this
      function.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
void sparsesymmpermtblbuf(const sparsematrix &a, const bool isupper, const integer_1d_array &p, const sparsematrix &b, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsesymmpermtblbuf(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), const_cast<alglib_impl::sparsematrix*>(b.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This procedure resizes Hash-Table matrix. It can be called when you  have
deleted too many elements from the matrix, and you want to  free unneeded
memory.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseresizematrix(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseresizematrix(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  is  used  to enumerate all elements of the sparse matrix.
Before  first  call  user  initializes  T0 and T1 counters by zero. These
counters are used to remember current position in a  matrix;  after  each
call they are updated by the function.

Subsequent calls to this function return non-zero elements of the  sparse
matrix, one by one. If you enumerate CRS matrix, matrix is traversed from
left to right, from top to bottom. In case you enumerate matrix stored as
Hash table, elements are returned in random order.

EXAMPLE
    > T0=0
    > T1=0
    > while SparseEnumerate(S,T0,T1,I,J,V) do
    >     ....do something with I,J,V

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table or CRS representation.
    T0          -   internal counter
    T1          -   internal counter

OUTPUT PARAMETERS
    T0          -   new value of the internal counter
    T1          -   new value of the internal counter
    I           -   row index of non-zero element, 0<=I<M.
    J           -   column index of non-zero element, 0<=J<N
    V           -   value of the T-th element

RESULT
    True in case of success (next non-zero element was retrieved)
    False in case all non-zero elements were enumerated

NOTE: you may call SparseRewriteExisting() during enumeration, but it  is
      THE  ONLY  matrix  modification  function  you  can  call!!!  Other
      matrix modification functions should not be called during enumeration!

  -- ALGLIB PROJECT --
     Copyright 14.03.2012 by Bochkanov Sergey
*************************************************************************/
bool sparseenumerate(const sparsematrix &s, ae_int_t &t0, ae_int_t &t1, ae_int_t &i, ae_int_t &j, double &v, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparseenumerate(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &t0, &t1, &i, &j, &v, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
This function rewrites existing (non-zero) element. It  returns  True   if
element  exists  or  False,  when  it  is  called for non-existing  (zero)
element.

This function works with any kind of the matrix.

The purpose of this function is to provide convenient thread-safe  way  to
modify  sparse  matrix.  Such  modification  (already  existing element is
rewritten) is guaranteed to be thread-safe without any synchronization, as
long as different threads modify different elements.

INPUT PARAMETERS
    S           -   sparse M*N matrix in any kind of representation
                    (Hash, SKS, CRS).
    I           -   row index of non-zero element to modify, 0<=I<M
    J           -   column index of non-zero element to modify, 0<=J<N
    V           -   value to rewrite, must be finite number

OUTPUT PARAMETERS
    S           -   modified matrix
RESULT
    True in case when element exists
    False in case when element doesn't exist or it is zero

  -- ALGLIB PROJECT --
     Copyright 14.03.2012 by Bochkanov Sergey
*************************************************************************/
bool sparserewriteexisting(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const double v, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparserewriteexisting(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, v, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
This function returns I-th row of the sparse matrix. Matrix must be stored
in CRS or SKS format.

INPUT PARAMETERS:
    S           -   sparse M*N matrix in CRS format
    I           -   row index, 0<=I<M
    IRow        -   output buffer, can be  preallocated.  In  case  buffer
                    size  is  too  small  to  store  I-th   row,   it   is
                    automatically reallocated.

OUTPUT PARAMETERS:
    IRow        -   array[M], I-th row.

NOTE: this function has O(N) running time, where N is a  column  count. It
      allocates and fills N-element  array,  even  although  most  of  its
      elemets are zero.

NOTE: If you have O(non-zeros-per-row) time and memory  requirements,  use
      SparseGetCompressedRow() function. It  returns  data  in  compressed
      format.

NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
      is passed, this function throws exception.

  -- ALGLIB PROJECT --
     Copyright 10.12.2014 by Bochkanov Sergey
*************************************************************************/
void sparsegetrow(const sparsematrix &s, const ae_int_t i, real_1d_array &irow, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsegetrow(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, const_cast<alglib_impl::ae_vector*>(irow.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function returns I-th row of the sparse matrix IN COMPRESSED FORMAT -
only non-zero elements are returned (with their indexes). Matrix  must  be
stored in CRS or SKS format.

INPUT PARAMETERS:
    S           -   sparse M*N matrix in CRS format
    I           -   row index, 0<=I<M
    ColIdx      -   output buffer for column indexes, can be preallocated.
                    In case buffer size is too small to store I-th row, it
                    is automatically reallocated.
    Vals        -   output buffer for values, can be preallocated. In case
                    buffer size is too small to  store  I-th  row,  it  is
                    automatically reallocated.

OUTPUT PARAMETERS:
    ColIdx      -   column   indexes   of  non-zero  elements,  sorted  by
                    ascending. Symbolically non-zero elements are  counted
                    (i.e. if you allocated place for element, but  it  has
                    zero numerical value - it is counted).
    Vals        -   values. Vals[K] stores value of  matrix  element  with
                    indexes (I,ColIdx[K]). Symbolically non-zero  elements
                    are counted (i.e. if you allocated place for  element,
                    but it has zero numerical value - it is counted).
    NZCnt       -   number of symbolically non-zero elements per row.

NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
      is passed, this function throws exception.

NOTE: this function may allocate additional, unnecessary place for  ColIdx
      and Vals arrays. It is dictated by  performance  reasons  -  on  SKS
      matrices it is faster  to  allocate  space  at  the  beginning  with
      some "extra"-space, than performing two passes over matrix  -  first
      time to calculate exact space required for data, second  time  -  to
      store data itself.

  -- ALGLIB PROJECT --
     Copyright 10.12.2014 by Bochkanov Sergey
*************************************************************************/
void sparsegetcompressedrow(const sparsematrix &s, const ae_int_t i, integer_1d_array &colidx, real_1d_array &vals, ae_int_t &nzcnt, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsegetcompressedrow(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, const_cast<alglib_impl::ae_vector*>(colidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(vals.c_ptr()), &nzcnt, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function performs efficient in-place  transpose  of  SKS  matrix.  No
additional memory is allocated during transposition.

This function supports only skyline storage format (SKS).

INPUT PARAMETERS
    S       -   sparse matrix in SKS format.

OUTPUT PARAMETERS
    S           -   sparse matrix, transposed.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsetransposesks(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsetransposesks(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function performs transpose of CRS matrix.

INPUT PARAMETERS
    S       -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S           -   sparse matrix, transposed.

NOTE: internal  temporary  copy  is  allocated   for   the   purposes   of
      transposition. It is deallocated after transposition.

  -- ALGLIB PROJECT --
     Copyright 30.01.2018 by Bochkanov Sergey
*************************************************************************/
void sparsetransposecrs(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsetransposecrs(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function performs copying with transposition of CRS matrix.

INPUT PARAMETERS
    S0      -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S1      -   sparse matrix, transposed

  -- ALGLIB PROJECT --
     Copyright 23.07.2018 by Bochkanov Sergey
*************************************************************************/
void sparsecopytransposecrs(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytransposecrs(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function performs copying with transposition of CRS matrix  (buffered
version which reuses memory already allocated by  the  target as  much  as
possible).

INPUT PARAMETERS
    S0      -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S1      -   sparse matrix, transposed; previously allocated memory  is
                reused if possible.

  -- ALGLIB PROJECT --
     Copyright 23.07.2018 by Bochkanov Sergey
*************************************************************************/
void sparsecopytransposecrsbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytransposecrsbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs  in-place  conversion  to  desired sparse storage
format.

INPUT PARAMETERS
    S0      -   sparse matrix in any format.
    Fmt     -   desired storage format  of  the  output,  as  returned  by
                SparseGetMatrixType() function:
                * 0 for hash-based storage
                * 1 for CRS
                * 2 for SKS

OUTPUT PARAMETERS
    S0          -   sparse matrix in requested format.

NOTE: in-place conversion wastes a lot of memory which is  used  to  store
      temporaries.  If  you  perform  a  lot  of  repeated conversions, we
      recommend to use out-of-place buffered  conversion  functions,  like
      SparseCopyToBuf(), which can reuse already allocated memory.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparseconvertto(const sparsematrix &s0, const ae_int_t fmt, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseconvertto(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), fmt, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs out-of-place conversion to desired sparse storage
format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
S1 is reused to maximum extent possible.

INPUT PARAMETERS
    S0      -   sparse matrix in any format.
    Fmt     -   desired storage format  of  the  output,  as  returned  by
                SparseGetMatrixType() function:
                * 0 for hash-based storage
                * 1 for CRS
                * 2 for SKS

OUTPUT PARAMETERS
    S1          -   sparse matrix in requested format.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecopytobuf(const sparsematrix &s0, const ae_int_t fmt, const sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytobuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), fmt, const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function performs in-place conversion to Hash table storage.

INPUT PARAMETERS
    S           -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S           -   sparse matrix in Hash table format.

NOTE: this  function  has   no  effect  when  called with matrix which  is
      already in Hash table mode.

NOTE: in-place conversion involves allocation of temporary arrays. If  you
      perform a lot of repeated in- place  conversions,  it  may  lead  to
      memory fragmentation. Consider using out-of-place SparseCopyToHashBuf()
      function in this case.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparseconverttohash(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseconverttohash(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs  out-of-place  conversion  to  Hash table storage
format. S0 is copied to S1 and converted on-the-fly.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in Hash table format.

NOTE: if S0 is stored as Hash-table, it is just copied without conversion.

NOTE: this function de-allocates memory  occupied  by  S1 before  starting
      conversion. If you perform a  lot  of  repeated  conversions, it may
      lead to memory fragmentation. In this case we recommend you  to  use
      SparseCopyToHashBuf() function which re-uses memory in S1 as much as
      possible.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytohash(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytohash(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs  out-of-place  conversion  to  Hash table storage
format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
S1 is reused to maximum extent possible.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in Hash table format.

NOTE: if S0 is stored as Hash-table, it is just copied without conversion.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytohashbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytohashbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function converts matrix to CRS format.

Some  algorithms  (linear  algebra ones, for example) require matrices in
CRS format. This function allows to perform in-place conversion.

INPUT PARAMETERS
    S           -   sparse M*N matrix in any format

OUTPUT PARAMETERS
    S           -   matrix in CRS format

NOTE: this   function  has  no  effect  when  called with matrix which is
      already in CRS mode.

NOTE: this function allocates temporary memory to store a   copy  of  the
      matrix. If you perform a lot of repeated conversions, we  recommend
      you  to  use  SparseCopyToCRSBuf()  function,   which   can   reuse
      previously allocated memory.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseconverttocrs(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseconverttocrs(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
copied to S1 and converted on-the-fly.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in CRS format.

NOTE: if S0 is stored as CRS, it is just copied without conversion.

NOTE: this function de-allocates memory occupied by S1 before starting CRS
      conversion. If you perform a lot of repeated CRS conversions, it may
      lead to memory fragmentation. In this case we recommend you  to  use
      SparseCopyToCRSBuf() function which re-uses memory in S1 as much  as
      possible.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytocrs(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytocrs(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
copied to S1 and converted on-the-fly. Memory allocated in S1 is reused to
maximum extent possible.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.
    S1          -   matrix which may contain some pre-allocated memory, or
                    can be just uninitialized structure.

OUTPUT PARAMETERS
    S1          -   sparse matrix in CRS format.

NOTE: if S0 is stored as CRS, it is just copied without conversion.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytocrsbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytocrsbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function performs in-place conversion to SKS format.

INPUT PARAMETERS
    S           -   sparse matrix in any format.

OUTPUT PARAMETERS
    S           -   sparse matrix in SKS format.

NOTE: this  function  has   no  effect  when  called with matrix which  is
      already in SKS mode.

NOTE: in-place conversion involves allocation of temporary arrays. If  you
      perform a lot of repeated in- place  conversions,  it  may  lead  to
      memory fragmentation. Consider using out-of-place SparseCopyToSKSBuf()
      function in this case.

  -- ALGLIB PROJECT --
     Copyright 15.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparseconverttosks(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparseconverttosks(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs  out-of-place  conversion  to SKS storage format.
S0 is copied to S1 and converted on-the-fly.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in SKS format.

NOTE: if S0 is stored as SKS, it is just copied without conversion.

NOTE: this function de-allocates memory  occupied  by  S1 before  starting
      conversion. If you perform a  lot  of  repeated  conversions, it may
      lead to memory fragmentation. In this case we recommend you  to  use
      SparseCopyToSKSBuf() function which re-uses memory in S1 as much  as
      possible.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytosks(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytosks(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  performs  out-of-place  conversion  to SKS format.  S0  is
copied to S1 and converted on-the-fly. Memory  allocated  in S1 is  reused
to maximum extent possible.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in SKS format.

NOTE: if S0 is stored as SKS, it is just copied without conversion.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytosksbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecopytosksbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function returns type of the matrix storage format.

INPUT PARAMETERS:
    S           -   sparse matrix.

RESULT:
    sparse storage format used by matrix:
        0   -   Hash-table
        1   -   CRS (compressed row storage)
        2   -   SKS (skyline)

NOTE: future  versions  of  ALGLIB  may  include additional sparse storage
      formats.


  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetmatrixtype(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_int_t result = alglib_impl::sparsegetmatrixtype(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<ae_int_t*>(&result));
}

/*************************************************************************
This function checks matrix storage format and returns True when matrix is
stored using Hash table representation.

INPUT PARAMETERS:
    S   -   sparse matrix.

RESULT:
    True if matrix type is Hash table
    False if matrix type is not Hash table

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
bool sparseishash(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparseishash(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
This function checks matrix storage format and returns True when matrix is
stored using CRS representation.

INPUT PARAMETERS:
    S   -   sparse matrix.

RESULT:
    True if matrix type is CRS
    False if matrix type is not CRS

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
bool sparseiscrs(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparseiscrs(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
This function checks matrix storage format and returns True when matrix is
stored using SKS representation.

INPUT PARAMETERS:
    S   -   sparse matrix.

RESULT:
    True if matrix type is SKS
    False if matrix type is not SKS

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
bool sparseissks(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparseissks(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
The function frees all memory occupied by  sparse  matrix.  Sparse  matrix
structure becomes unusable after this call.

OUTPUT PARAMETERS
    S   -   sparse matrix to delete

  -- ALGLIB PROJECT --
     Copyright 24.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsefree(sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsefree(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
The function returns number of rows of a sparse matrix.

RESULT: number of rows of a sparse matrix.

  -- ALGLIB PROJECT --
     Copyright 23.08.2012 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetnrows(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_int_t result = alglib_impl::sparsegetnrows(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<ae_int_t*>(&result));
}

/*************************************************************************
The function returns number of columns of a sparse matrix.

RESULT: number of columns of a sparse matrix.

  -- ALGLIB PROJECT --
     Copyright 23.08.2012 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetncols(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_int_t result = alglib_impl::sparsegetncols(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<ae_int_t*>(&result));
}

/*************************************************************************
The function returns number of strictly upper triangular non-zero elements
in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
in the sparse matrix data structure. If some element  has  zero  numerical
value, it is still counted.

This function has different cost for different types of matrices:
* for hash-based matrices it involves complete pass over entire hash-table
  with O(NNZ) cost, where NNZ is number of non-zero elements
* for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).

RESULT: number of non-zero elements strictly above main diagonal

  -- ALGLIB PROJECT --
     Copyright 12.02.2014 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetuppercount(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_int_t result = alglib_impl::sparsegetuppercount(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<ae_int_t*>(&result));
}

/*************************************************************************
The function returns number of strictly lower triangular non-zero elements
in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
in the sparse matrix data structure. If some element  has  zero  numerical
value, it is still counted.

This function has different cost for different types of matrices:
* for hash-based matrices it involves complete pass over entire hash-table
  with O(NNZ) cost, where NNZ is number of non-zero elements
* for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).

RESULT: number of non-zero elements strictly below main diagonal

  -- ALGLIB PROJECT --
     Copyright 12.02.2014 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetlowercount(const sparsematrix &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_int_t result = alglib_impl::sparsegetlowercount(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<ae_int_t*>(&result));
}
#endif

#if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This object stores state of the subspace iteration algorithm.

You should use ALGLIB functions to work with this object.
*************************************************************************/
_eigsubspacestate_owner::_eigsubspacestate_owner()
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_eigsubspacestate_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    p_struct = (alglib_impl::eigsubspacestate*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacestate), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::eigsubspacestate));
    alglib_impl::_eigsubspacestate_init(p_struct, &_state, ae_false);
    ae_state_clear(&_state);
}

_eigsubspacestate_owner::_eigsubspacestate_owner(const _eigsubspacestate_owner &rhs)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_eigsubspacestate_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacestate copy constructor failure (source is not initialized)", &_state);
    p_struct = (alglib_impl::eigsubspacestate*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacestate), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::eigsubspacestate));
    alglib_impl::_eigsubspacestate_init_copy(p_struct, const_cast<alglib_impl::eigsubspacestate*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
}

_eigsubspacestate_owner& _eigsubspacestate_owner::operator=(const _eigsubspacestate_owner &rhs)
{
    if( this==&rhs )
        return *this;
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return *this;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: eigsubspacestate assignment constructor failure (destination is not initialized)", &_state);
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacestate assignment constructor failure (source is not initialized)", &_state);
    alglib_impl::_eigsubspacestate_destroy(p_struct);
    memset(p_struct, 0, sizeof(alglib_impl::eigsubspacestate));
    alglib_impl::_eigsubspacestate_init_copy(p_struct, const_cast<alglib_impl::eigsubspacestate*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
    return *this;
}

_eigsubspacestate_owner::~_eigsubspacestate_owner()
{
    if( p_struct!=NULL )
    {
        alglib_impl::_eigsubspacestate_destroy(p_struct);
        ae_free(p_struct);
    }
}

alglib_impl::eigsubspacestate* _eigsubspacestate_owner::c_ptr()
{
    return p_struct;
}

alglib_impl::eigsubspacestate* _eigsubspacestate_owner::c_ptr() const
{
    return const_cast<alglib_impl::eigsubspacestate*>(p_struct);
}
eigsubspacestate::eigsubspacestate() : _eigsubspacestate_owner()
{
}

eigsubspacestate::eigsubspacestate(const eigsubspacestate &rhs):_eigsubspacestate_owner(rhs)
{
}

eigsubspacestate& eigsubspacestate::operator=(const eigsubspacestate &rhs)
{
    if( this==&rhs )
        return *this;
    _eigsubspacestate_owner::operator=(rhs);
    return *this;
}

eigsubspacestate::~eigsubspacestate()
{
}


/*************************************************************************
This object stores state of the subspace iteration algorithm.

You should use ALGLIB functions to work with this object.
*************************************************************************/
_eigsubspacereport_owner::_eigsubspacereport_owner()
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_eigsubspacereport_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    p_struct = (alglib_impl::eigsubspacereport*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacereport), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::eigsubspacereport));
    alglib_impl::_eigsubspacereport_init(p_struct, &_state, ae_false);
    ae_state_clear(&_state);
}

_eigsubspacereport_owner::_eigsubspacereport_owner(const _eigsubspacereport_owner &rhs)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_eigsubspacereport_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacereport copy constructor failure (source is not initialized)", &_state);
    p_struct = (alglib_impl::eigsubspacereport*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacereport), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::eigsubspacereport));
    alglib_impl::_eigsubspacereport_init_copy(p_struct, const_cast<alglib_impl::eigsubspacereport*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
}

_eigsubspacereport_owner& _eigsubspacereport_owner::operator=(const _eigsubspacereport_owner &rhs)
{
    if( this==&rhs )
        return *this;
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return *this;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: eigsubspacereport assignment constructor failure (destination is not initialized)", &_state);
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacereport assignment constructor failure (source is not initialized)", &_state);
    alglib_impl::_eigsubspacereport_destroy(p_struct);
    memset(p_struct, 0, sizeof(alglib_impl::eigsubspacereport));
    alglib_impl::_eigsubspacereport_init_copy(p_struct, const_cast<alglib_impl::eigsubspacereport*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
    return *this;
}

_eigsubspacereport_owner::~_eigsubspacereport_owner()
{
    if( p_struct!=NULL )
    {
        alglib_impl::_eigsubspacereport_destroy(p_struct);
        ae_free(p_struct);
    }
}

alglib_impl::eigsubspacereport* _eigsubspacereport_owner::c_ptr()
{
    return p_struct;
}

alglib_impl::eigsubspacereport* _eigsubspacereport_owner::c_ptr() const
{
    return const_cast<alglib_impl::eigsubspacereport*>(p_struct);
}
eigsubspacereport::eigsubspacereport() : _eigsubspacereport_owner() ,iterationscount(p_struct->iterationscount)
{
}

eigsubspacereport::eigsubspacereport(const eigsubspacereport &rhs):_eigsubspacereport_owner(rhs) ,iterationscount(p_struct->iterationscount)
{
}

eigsubspacereport& eigsubspacereport::operator=(const eigsubspacereport &rhs)
{
    if( this==&rhs )
        return *this;
    _eigsubspacereport_owner::operator=(rhs);
    return *this;
}

eigsubspacereport::~eigsubspacereport()
{
}

/*************************************************************************
This function initializes subspace iteration solver. This solver  is  used
to solve symmetric real eigenproblems where just a few (top K) eigenvalues
and corresponding eigenvectors is required.

This solver can be significantly faster than  complete  EVD  decomposition
in the following case:
* when only just a small fraction  of  top  eigenpairs  of dense matrix is
  required. When K approaches N, this solver is slower than complete dense
  EVD
* when problem matrix is sparse (and/or is not known explicitly, i.e. only
  matrix-matrix product can be performed)

USAGE (explicit dense/sparse matrix):
1. User initializes algorithm state with eigsubspacecreate() call
2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
   or other functions
3. User  calls  eigsubspacesolvedense() or eigsubspacesolvesparse() methods,
   which take algorithm state and 2D array or alglib.sparsematrix object.

USAGE (out-of-core mode):
1. User initializes algorithm state with eigsubspacecreate() call
2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
   or other functions
3. User activates out-of-core mode of  the  solver  and  repeatedly  calls
   communication functions in a loop like below:
   > alglib.eigsubspaceoocstart(state)
   > while alglib.eigsubspaceooccontinue(state) do
   >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
   >     alglib.eigsubspaceoocgetrequestdata(state, out X)
   >     [calculate  Y=A*X, with X=R^NxM]
   >     alglib.eigsubspaceoocsendresult(state, in Y)
   > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    N       -   problem dimensionality, N>0
    K       -   number of top eigenvector to calculate, 0<K<=N.

OUTPUT PARAMETERS:
    State   -   structure which stores algorithm state

NOTE: if you solve many similar EVD problems you may  find  it  useful  to
      reuse previous subspace as warm-start point for new EVD problem.  It
      can be done with eigsubspacesetwarmstart() function.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacecreate(const ae_int_t n, const ae_int_t k, eigsubspacestate &state, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspacecreate(n, k, const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Buffered version of constructor which aims to reuse  previously  allocated
memory as much as possible.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacecreatebuf(const ae_int_t n, const ae_int_t k, const eigsubspacestate &state, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspacecreatebuf(n, k, const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function sets stopping critera for the solver:
* error in eigenvector/value allowed by solver
* maximum number of iterations to perform

INPUT PARAMETERS:
    State       -   solver structure
    Eps         -   eps>=0,  with non-zero value used to tell solver  that
                    it can  stop  after  all  eigenvalues  converged  with
                    error  roughly  proportional  to  eps*MAX(LAMBDA_MAX),
                    where LAMBDA_MAX is a maximum eigenvalue.
                    Zero  value  means  that  no  check  for  precision is
                    performed.
    MaxIts      -   maxits>=0,  with non-zero value used  to  tell  solver
                    that it can stop after maxits  steps  (no  matter  how
                    precise current estimate is)

NOTE: passing  eps=0  and  maxits=0  results  in  automatic  selection  of
      moderate eps as stopping criteria (1.0E-6 in current implementation,
      but it may change without notice).

NOTE: very small values of eps are possible (say, 1.0E-12),  although  the
      larger problem you solve (N and/or K), the  harder  it  is  to  find
      precise eigenvectors because rounding errors tend to accumulate.

NOTE: passing non-zero eps results in  some performance  penalty,  roughly
      equal to 2N*(2K)^2 FLOPs per iteration. These additional computations
      are required in order to estimate current error in  eigenvalues  via
      Rayleigh-Ritz process.
      Most of this additional time is  spent  in  construction  of  ~2Kx2K
      symmetric  subproblem  whose  eigenvalues  are  checked  with  exact
      eigensolver.
      This additional time is negligible if you search for eigenvalues  of
      the large dense matrix, but may become noticeable on  highly  sparse
      EVD problems, where cost of matrix-matrix product is low.
      If you set eps to exactly zero,  Rayleigh-Ritz  phase  is completely
      turned off.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesetcond(const eigsubspacestate &state, const double eps, const ae_int_t maxits, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspacesetcond(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), eps, maxits, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function sets warm-start mode of the solver: next call to the  solver
will reuse previous subspace as warm-start  point.  It  can  significantly
speed-up convergence when you solve many similar eigenproblems.

INPUT PARAMETERS:
    State       -   solver structure
    UseWarmStart-   either True or False

  -- ALGLIB --
     Copyright 12.11.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesetwarmstart(const eigsubspacestate &state, const bool usewarmstart, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspacesetwarmstart(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), usewarmstart, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  initiates  out-of-core  mode  of  subspace eigensolver. It
should be used in conjunction with other out-of-core-related functions  of
this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State       -   solver object
    MType       -   matrix type:
                    * 0 for real  symmetric  matrix  (solver  assumes that
                      matrix  being   processed  is  symmetric;  symmetric
                      direct eigensolver is used for  smaller  subproblems
                      arising during solution of larger "full" task)
                    Future versions of ALGLIB may  introduce  support  for
                    other  matrix   types;   for   now,   only   symmetric
                    eigenproblems are supported.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocstart(const eigsubspacestate &state, const ae_int_t mtype, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspaceoocstart(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), mtype, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function performs subspace iteration  in  the  out-of-core  mode.  It
should be used in conjunction with other out-of-core-related functions  of
this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
bool eigsubspaceooccontinue(const eigsubspacestate &state, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::eigsubspaceooccontinue(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
This function is used to retrieve information  about  out-of-core  request
sent by solver to user code: request type (current version  of  the solver
sends only requests for matrix-matrix products) and request size (size  of
the matrices being multiplied).

This function returns just request metrics; in order  to  get contents  of
the matrices being multiplied, use eigsubspaceoocgetrequestdata().

It should be used in conjunction with other out-of-core-related  functions
of this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State           -   solver running in out-of-core mode

OUTPUT PARAMETERS:
    RequestType     -   type of the request to process:
                        * 0 - for matrix-matrix product A*X, with A  being
                          NxN matrix whose eigenvalues/vectors are needed,
                          and X being NxREQUESTSIZE one which is  returned
                          by the eigsubspaceoocgetrequestdata().
    RequestSize     -   size of the X matrix (number of columns),  usually
                        it is several times larger than number of  vectors
                        K requested by user.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocgetrequestinfo(const eigsubspacestate &state, ae_int_t &requesttype, ae_int_t &requestsize, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspaceoocgetrequestinfo(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &requesttype, &requestsize, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function is used to retrieve information  about  out-of-core  request
sent by solver to user code: matrix X (array[N,RequestSize) which have  to
be multiplied by out-of-core matrix A in a product A*X.

This function returns just request data; in order to get size of  the data
prior to processing requestm, use eigsubspaceoocgetrequestinfo().

It should be used in conjunction with other out-of-core-related  functions
of this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State           -   solver running in out-of-core mode
    X               -   possibly  preallocated   storage;  reallocated  if
                        needed, left unchanged, if large enough  to  store
                        request data.

OUTPUT PARAMETERS:
    X               -   array[N,RequestSize] or larger, leading  rectangle
                        is filled with dense matrix X.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocgetrequestdata(const eigsubspacestate &state, real_2d_array &x, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspaceoocgetrequestdata(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function is used to send user reply to out-of-core  request  sent  by
solver. Usually it is product A*X for returned by solver matrix X.

It should be used in conjunction with other out-of-core-related  functions
of this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State           -   solver running in out-of-core mode
    AX              -   array[N,RequestSize] or larger, leading  rectangle
                        is filled with product A*X.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocsendresult(const eigsubspacestate &state, const real_2d_array &ax, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspaceoocsendresult(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(ax.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function  finalizes out-of-core  mode  of  subspace eigensolver.  It
should be used in conjunction with other out-of-core-related functions  of
this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State       -   solver state

OUTPUT PARAMETERS:
    W           -   array[K], depending on solver settings:
                    * top  K  eigenvalues ordered  by  descending   -   if
                      eigenvectors are returned in Z
                    * zeros - if invariant subspace is returned in Z
    Z           -   array[N,K], depending on solver settings either:
                    * matrix of eigenvectors found
                    * orthogonal basis of K-dimensional invariant subspace
    Rep         -   report with additional parameters

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocstop(const eigsubspacestate &state, real_1d_array &w, real_2d_array &z, eigsubspacereport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspaceoocstop(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), const_cast<alglib_impl::eigsubspacereport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function runs subspace eigensolver for dense NxN symmetric matrix A,
given by its upper or lower triangle.

This function can not process nonsymmetric matrices.

INPUT PARAMETERS:
    State       -   solver state
    A           -   array[N,N], symmetric NxN matrix given by one  of  its
                    triangles
    IsUpper     -   whether upper or lower triangle of  A  is  given  (the
                    other one is not referenced at all).

OUTPUT PARAMETERS:
    W           -   array[K], top  K  eigenvalues ordered  by   descending
                    of their absolute values
    Z           -   array[N,K], matrix of eigenvectors found
    Rep         -   report with additional parameters

NOTE: internally this function allocates a copy of NxN dense A. You should
      take it into account when working with very large matrices occupying
      almost all RAM.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesolvedenses(const eigsubspacestate &state, const real_2d_array &a, const bool isupper, real_1d_array &w, real_2d_array &z, eigsubspacereport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspacesolvedenses(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), const_cast<alglib_impl::eigsubspacereport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This  function runs eigensolver for dense NxN symmetric matrix A, given by
upper or lower triangle.

This function can not process nonsymmetric matrices.

INPUT PARAMETERS:
    State       -   solver state
    A           -   NxN symmetric matrix given by one of its triangles
    IsUpper     -   whether upper or lower triangle of  A  is  given  (the
                    other one is not referenced at all).

OUTPUT PARAMETERS:
    W           -   array[K], top  K  eigenvalues ordered  by   descending
                    of their absolute values
    Z           -   array[N,K], matrix of eigenvectors found
    Rep         -   report with additional parameters

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesolvesparses(const eigsubspacestate &state, const sparsematrix &a, const bool isupper, real_1d_array &w, real_2d_array &z, eigsubspacereport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::eigsubspacesolvesparses(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), const_cast<alglib_impl::eigsubspacereport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Finding the eigenvalues and eigenvectors of a symmetric matrix

The algorithm finds eigen pairs of a symmetric matrix by reducing it to
tridiagonal form and using the QL/QR algorithm.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   symmetric matrix which is given by its upper or lower
                triangular part.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpper -   storage format.

Output parameters:
    D       -   eigenvalues in ascending order.
                Array whose index ranges within [0..N-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains the eigenvectors.
                Array whose indexes range within [0..N-1, 0..N-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged (rare case).

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
bool smatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, real_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Subroutine for finding the eigenvalues (and eigenvectors) of  a  symmetric
matrix  in  a  given half open interval (A, B] by using  a  bisection  and
inverse iteration

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   symmetric matrix which is given by its upper or lower
                triangular part. Array [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    B1, B2 -    half open interval (B1, B2] to search eigenvalues in.

Output parameters:
    M       -   number of eigenvalues found in a given half-interval (M>=0).
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..M-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..M-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if successful. M contains the number of eigenvalues in the given
    half-interval (could be equal to 0), W contains the eigenvalues,
    Z contains the eigenvectors (if needed).

    False, if the bisection method subroutine wasn't able to find the
    eigenvalues in the given interval or if the inverse iteration subroutine
    wasn't able to find all the corresponding eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned,
    M is equal to 0.

  -- ALGLIB --
     Copyright 07.01.2006 by Bochkanov Sergey
*************************************************************************/
bool smatrixevdr(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, real_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Subroutine for finding the eigenvalues and  eigenvectors  of  a  symmetric
matrix with given indexes by using bisection and inverse iteration methods.

Input parameters:
    A       -   symmetric matrix which is given by its upper or lower
                triangular part. Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    I1, I2 -    index interval for searching (from I1 to I2).
                0 <= I1 <= I2 <= N-1.

Output parameters:
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..I2-I1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..I2-I1].
                In that case, the eigenvectors are stored in the matrix columns.

Result:
    True, if successful. W contains the eigenvalues, Z contains the
    eigenvectors (if needed).

    False, if the bisection method subroutine wasn't able to find the
    eigenvalues in the given interval or if the inverse iteration subroutine
    wasn't able to find all the corresponding eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned.

  -- ALGLIB --
     Copyright 07.01.2006 by Bochkanov Sergey
*************************************************************************/
bool smatrixevdi(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, real_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Finding the eigenvalues and eigenvectors of a Hermitian matrix

The algorithm finds eigen pairs of a Hermitian matrix by  reducing  it  to
real tridiagonal form and using the QL/QR algorithm.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   Hermitian matrix which is given  by  its  upper  or  lower
                triangular part.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format.
    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
                not. If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.

Output parameters:
    D       -   eigenvalues in ascending order.
                Array whose index ranges within [0..N-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains the eigenvectors.
                Array whose indexes range within [0..N-1, 0..N-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged (rare case).

Note:
    eigenvectors of Hermitian matrix are defined up to  multiplication  by
    a complex number L, such that |L|=1.

  -- ALGLIB --
     Copyright 2005, 23 March 2007 by Bochkanov Sergey
*************************************************************************/
bool hmatrixevd(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, complex_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::hmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Subroutine for finding the eigenvalues (and eigenvectors) of  a  Hermitian
matrix  in  a  given half-interval (A, B] by using a bisection and inverse
iteration

Input parameters:
    A       -   Hermitian matrix which is given  by  its  upper  or  lower
                triangular  part.  Array  whose   indexes   range   within
                [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
                not. If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    B1, B2 -    half-interval (B1, B2] to search eigenvalues in.

Output parameters:
    M       -   number of eigenvalues found in a given half-interval, M>=0
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..M-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..M-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if successful. M contains the number of eigenvalues in the given
    half-interval (could be equal to 0), W contains the eigenvalues,
    Z contains the eigenvectors (if needed).

    False, if the bisection method subroutine  wasn't  able  to  find  the
    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
    subroutine  wasn't  able  to  find all the corresponding eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned, M  is
    equal to 0.

Note:
    eigen vectors of Hermitian matrix are defined up to multiplication  by
    a complex number L, such as |L|=1.

  -- ALGLIB --
     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
*************************************************************************/
bool hmatrixevdr(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, complex_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::hmatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Subroutine for finding the eigenvalues and  eigenvectors  of  a  Hermitian
matrix with given indexes by using bisection and inverse iteration methods

Input parameters:
    A       -   Hermitian matrix which is given  by  its  upper  or  lower
                triangular part.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
                not. If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    I1, I2 -    index interval for searching (from I1 to I2).
                0 <= I1 <= I2 <= N-1.

Output parameters:
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..I2-I1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..I2-I1].
                In  that  case,  the eigenvectors are stored in the matrix
                columns.

Result:
    True, if successful. W contains the eigenvalues, Z contains the
    eigenvectors (if needed).

    False, if the bisection method subroutine  wasn't  able  to  find  the
    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
    subroutine wasn't able to find  all  the  corresponding  eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned.

Note:
    eigen vectors of Hermitian matrix are defined up to multiplication  by
    a complex number L, such as |L|=1.

  -- ALGLIB --
     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
*************************************************************************/
bool hmatrixevdi(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, complex_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::hmatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix

The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by
using an QL/QR algorithm with implicit shifts.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    D       -   the main diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-1].
    E       -   the secondary diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not needed;
                 * 1, the eigenvectors of a tridiagonal matrix
                   are multiplied by the square matrix Z. It is used if the
                   tridiagonal matrix is obtained by the similarity
                   transformation of a symmetric matrix;
                 * 2, the eigenvectors of a tridiagonal matrix replace the
                   square matrix Z;
                 * 3, matrix Z contains the first row of the eigenvectors
                   matrix.
    Z       -   if ZNeeded=1, Z contains the square matrix by which the
                eigenvectors are multiplied.
                Array whose indexes range within [0..N-1, 0..N-1].

Output parameters:
    D       -   eigenvalues in ascending order.
                Array whose index ranges within [0..N-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains the product of a given matrix (from the left)
                   and the eigenvectors matrix (from the right);
                 * 2, Z contains the eigenvectors.
                 * 3, Z contains the first row of the eigenvectors matrix.
                If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1].
                In that case, the eigenvectors are stored in the matrix columns.
                If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1].

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
bool smatrixtdevd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, real_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixtdevd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a
given half-interval (A, B] by using bisection and inverse iteration.

Input parameters:
    D       -   the main diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-1].
    E       -   the secondary diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix, N>=0.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not needed;
                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
                   by the square matrix Z. It is used if the tridiagonal
                   matrix is obtained by the similarity transformation
                   of a symmetric matrix.
                 * 2, the eigenvectors of a tridiagonal matrix replace matrix Z.
    A, B    -   half-interval (A, B] to search eigenvalues in.
    Z       -   if ZNeeded is equal to:
                 * 0, Z isn't used and remains unchanged;
                 * 1, Z contains the square matrix (array whose indexes range
                   within [0..N-1, 0..N-1]) which reduces the given symmetric
                   matrix to tridiagonal form;
                 * 2, Z isn't used (but changed on the exit).

Output parameters:
    D       -   array of the eigenvalues found.
                Array whose index ranges within [0..M-1].
    M       -   number of eigenvalues found in the given half-interval (M>=0).
    Z       -   if ZNeeded is equal to:
                 * 0, doesn't contain any information;
                 * 1, contains the product of a given NxN matrix Z (from the
                   left) and NxM matrix of the eigenvectors found (from the
                   right). Array whose indexes range within [0..N-1, 0..M-1].
                 * 2, contains the matrix of the eigenvectors found.
                   Array whose indexes range within [0..N-1, 0..M-1].

Result:

    True, if successful. In that case, M contains the number of eigenvalues
    in the given half-interval (could be equal to 0), D contains the eigenvalues,
    Z contains the eigenvectors (if needed).
    It should be noted that the subroutine changes the size of arrays D and Z.

    False, if the bisection method subroutine wasn't able to find the
    eigenvalues in the given interval or if the inverse iteration subroutine
    wasn't able to find all the corresponding eigenvectors. In that case,
    the eigenvalues and eigenvectors are not returned, M is equal to 0.

  -- ALGLIB --
     Copyright 31.03.2008 by Bochkanov Sergey
*************************************************************************/
bool smatrixtdevdr(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const double a, const double b, ae_int_t &m, real_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixtdevdr(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, a, b, &m, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Subroutine for finding tridiagonal matrix eigenvalues/vectors with given
indexes (in ascending order) by using the bisection and inverse iteraion.

Input parameters:
    D       -   the main diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-1].
    E       -   the secondary diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix. N>=0.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not needed;
                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
                   by the square matrix Z. It is used if the
                   tridiagonal matrix is obtained by the similarity transformation
                   of a symmetric matrix.
                 * 2, the eigenvectors of a tridiagonal matrix replace
                   matrix Z.
    I1, I2  -   index interval for searching (from I1 to I2).
                0 <= I1 <= I2 <= N-1.
    Z       -   if ZNeeded is equal to:
                 * 0, Z isn't used and remains unchanged;
                 * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1])
                   which reduces the given symmetric matrix to  tridiagonal form;
                 * 2, Z isn't used (but changed on the exit).

Output parameters:
    D       -   array of the eigenvalues found.
                Array whose index ranges within [0..I2-I1].
    Z       -   if ZNeeded is equal to:
                 * 0, doesn't contain any information;
                 * 1, contains the product of a given NxN matrix Z (from the left) and
                   Nx(I2-I1) matrix of the eigenvectors found (from the right).
                   Array whose indexes range within [0..N-1, 0..I2-I1].
                 * 2, contains the matrix of the eigenvalues found.
                   Array whose indexes range within [0..N-1, 0..I2-I1].


Result:

    True, if successful. In that case, D contains the eigenvalues,
    Z contains the eigenvectors (if needed).
    It should be noted that the subroutine changes the size of arrays D and Z.

    False, if the bisection method subroutine wasn't able to find the eigenvalues
    in the given interval or if the inverse iteration subroutine wasn't able
    to find all the corresponding eigenvectors. In that case, the eigenvalues
    and eigenvectors are not returned.

  -- ALGLIB --
     Copyright 25.12.2005 by Bochkanov Sergey
*************************************************************************/
bool smatrixtdevdi(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const ae_int_t i1, const ae_int_t i2, real_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixtdevdi(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, i1, i2, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Finding eigenvalues and eigenvectors of a general (unsymmetric) matrix

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The algorithm finds eigenvalues and eigenvectors of a general matrix by
using the QR algorithm with multiple shifts. The algorithm can find
eigenvalues and both left and right eigenvectors.

The right eigenvector is a vector x such that A*x = w*x, and the left
eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex
conjugate transposition of vector y).

Input parameters:
    A       -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    VNeeded -   flag controlling whether eigenvectors are needed or not.
                If VNeeded is equal to:
                 * 0, eigenvectors are not returned;
                 * 1, right eigenvectors are returned;
                 * 2, left eigenvectors are returned;
                 * 3, both left and right eigenvectors are returned.

Output parameters:
    WR      -   real parts of eigenvalues.
                Array whose index ranges within [0..N-1].
    WR      -   imaginary parts of eigenvalues.
                Array whose index ranges within [0..N-1].
    VL, VR  -   arrays of left and right eigenvectors (if they are needed).
                If WI[i]=0, the respective eigenvalue is a real number,
                and it corresponds to the column number I of matrices VL/VR.
                If WI[i]>0, we have a pair of complex conjugate numbers with
                positive and negative imaginary parts:
                    the first eigenvalue WR[i] + sqrt(-1)*WI[i];
                    the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1];
                    WI[i]>0
                    WI[i+1] = -WI[i] < 0
                In that case, the eigenvector  corresponding to the first
                eigenvalue is located in i and i+1 columns of matrices
                VL/VR (the column number i contains the real part, and the
                column number i+1 contains the imaginary part), and the vector
                corresponding to the second eigenvalue is a complex conjugate to
                the first vector.
                Arrays whose indexes range within [0..N-1, 0..N-1].

Result:
    True, if the algorithm has converged.
    False, if the algorithm has not converged.

Note 1:
    Some users may ask the following question: what if WI[N-1]>0?
    WI[N] must contain an eigenvalue which is complex conjugate to the
    N-th eigenvalue, but the array has only size N?
    The answer is as follows: such a situation cannot occur because the
    algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is
    strictly less than N-1.

Note 2:
    The algorithm performance depends on the value of the internal parameter
    NS of the InternalSchurDecomposition subroutine which defines the number
    of shifts in the QR algorithm (similarly to the block width in block-matrix
    algorithms of linear algebra). If you require maximum performance
    on your machine, it is recommended to adjust this parameter manually.


See also the InternalTREVC subroutine.

The algorithm is based on the LAPACK 3.0 library.
*************************************************************************/
bool rmatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t vneeded, real_1d_array &wr, real_1d_array &wi, real_2d_array &vl, real_2d_array &vr, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::rmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, vneeded, const_cast<alglib_impl::ae_vector*>(wr.c_ptr()), const_cast<alglib_impl::ae_vector*>(wi.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vl.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vr.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}
#endif

#if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
An analysis of the sparse matrix decomposition, performed prior to  actual
numerical factorization. You should not directly  access  fields  of  this
object - use appropriate ALGLIB functions to work with this object.
*************************************************************************/
_sparsedecompositionanalysis_owner::_sparsedecompositionanalysis_owner()
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    p_struct = (alglib_impl::sparsedecompositionanalysis*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsedecompositionanalysis), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::sparsedecompositionanalysis));
    alglib_impl::_sparsedecompositionanalysis_init(p_struct, &_state, ae_false);
    ae_state_clear(&_state);
}

_sparsedecompositionanalysis_owner::_sparsedecompositionanalysis_owner(const _sparsedecompositionanalysis_owner &rhs)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsedecompositionanalysis copy constructor failure (source is not initialized)", &_state);
    p_struct = (alglib_impl::sparsedecompositionanalysis*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsedecompositionanalysis), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::sparsedecompositionanalysis));
    alglib_impl::_sparsedecompositionanalysis_init_copy(p_struct, const_cast<alglib_impl::sparsedecompositionanalysis*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
}

_sparsedecompositionanalysis_owner& _sparsedecompositionanalysis_owner::operator=(const _sparsedecompositionanalysis_owner &rhs)
{
    if( this==&rhs )
        return *this;
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return *this;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: sparsedecompositionanalysis assignment constructor failure (destination is not initialized)", &_state);
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsedecompositionanalysis assignment constructor failure (source is not initialized)", &_state);
    alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
    memset(p_struct, 0, sizeof(alglib_impl::sparsedecompositionanalysis));
    alglib_impl::_sparsedecompositionanalysis_init_copy(p_struct, const_cast<alglib_impl::sparsedecompositionanalysis*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
    return *this;
}

_sparsedecompositionanalysis_owner::~_sparsedecompositionanalysis_owner()
{
    if( p_struct!=NULL )
    {
        alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
        ae_free(p_struct);
    }
}

alglib_impl::sparsedecompositionanalysis* _sparsedecompositionanalysis_owner::c_ptr()
{
    return p_struct;
}

alglib_impl::sparsedecompositionanalysis* _sparsedecompositionanalysis_owner::c_ptr() const
{
    return const_cast<alglib_impl::sparsedecompositionanalysis*>(p_struct);
}
sparsedecompositionanalysis::sparsedecompositionanalysis() : _sparsedecompositionanalysis_owner()
{
}

sparsedecompositionanalysis::sparsedecompositionanalysis(const sparsedecompositionanalysis &rhs):_sparsedecompositionanalysis_owner(rhs)
{
}

sparsedecompositionanalysis& sparsedecompositionanalysis::operator=(const sparsedecompositionanalysis &rhs)
{
    if( this==&rhs )
        return *this;
    _sparsedecompositionanalysis_owner::operator=(rhs);
    return *this;
}

sparsedecompositionanalysis::~sparsedecompositionanalysis()
{
}

/*************************************************************************
LU decomposition of a general real matrix with row pivoting

A is represented as A = P*L*U, where:
* L is lower unitriangular matrix
* U is upper triangular matrix
* P = P0*P1*...*PK, K=min(M,N)-1,
  Pi - permutation matrix for I and Pivots[I]

INPUT PARAMETERS:
    A       -   array[0..M-1, 0..N-1].
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.


OUTPUT PARAMETERS:
    A       -   matrices L and U in compact form:
                * L is stored under main diagonal
                * U is stored on and above main diagonal
    Pivots  -   permutation matrix in compact form.
                array[0..Min(M-1,N-1)].

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.01.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlu(real_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
LU decomposition of a general complex matrix with row pivoting

A is represented as A = P*L*U, where:
* L is lower unitriangular matrix
* U is upper triangular matrix
* P = P0*P1*...*PK, K=min(M,N)-1,
  Pi - permutation matrix for I and Pivots[I]

INPUT PARAMETERS:
    A       -   array[0..M-1, 0..N-1].
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.


OUTPUT PARAMETERS:
    A       -   matrices L and U in compact form:
                * L is stored under main diagonal
                * U is stored on and above main diagonal
    Pivots  -   permutation matrix in compact form.
                array[0..Min(M-1,N-1)].

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.01.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixlu(complex_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Cache-oblivious Cholesky decomposition

The algorithm computes Cholesky decomposition  of  a  Hermitian  positive-
definite matrix. The result of an algorithm is a representation  of  A  as
A=U'*U  or A=L*L' (here X' denotes conj(X^T)).

INPUT PARAMETERS:
    A       -   upper or lower triangle of a factorized matrix.
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   if IsUpper=True, then A contains an upper triangle of
                a symmetric matrix, otherwise A contains a lower one.

OUTPUT PARAMETERS:
    A       -   the result of factorization. If IsUpper=True, then
                the upper triangle contains matrix U, so that A = U'*U,
                and the elements below the main diagonal are not modified.
                Similarly, if IsUpper = False.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False. Contents of A is not determined
    in such case.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
bool hpdmatrixcholesky(complex_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::hpdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Cache-oblivious Cholesky decomposition

The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
definite matrix. The result of an algorithm is a representation  of  A  as
A=U^T*U  or A=L*L^T

INPUT PARAMETERS:
    A       -   upper or lower triangle of a factorized matrix.
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   if IsUpper=True, then A contains an upper triangle of
                a symmetric matrix, otherwise A contains a lower one.

OUTPUT PARAMETERS:
    A       -   the result of factorization. If IsUpper=True, then
                the upper triangle contains matrix U, so that A = U^T*U,
                and the elements below the main diagonal are not modified.
                Similarly, if IsUpper = False.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False. Contents of A is not determined
    in such case.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
bool spdmatrixcholesky(real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::spdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
version which uses preallocated buffer which is saved  between  subsequent
function calls.

This function uses internally allocated buffer which is not saved  between
subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
we  recommend   you   to   use   "buffered"   version   of  this function:
SPDMatrixCholeskyUpdateAdd1Buf().

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    U       -   array[N], rank-1 update to A: A_mod = A + u*u'
                Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

NOTE: this function always succeeds, so it does not return completion code

NOTE: this function checks sizes of input arrays, but it does  NOT  checks
      for presence of infinities or NAN's.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdateadd1(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &u, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixcholeskyupdateadd1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Update of Cholesky decomposition: "fixing" some variables.

This function uses internally allocated buffer which is not saved  between
subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
we  recommend   you   to   use   "buffered"   version   of  this function:
SPDMatrixCholeskyUpdateFixBuf().

"FIXING" EXPLAINED:

    Suppose we have N*N positive definite matrix A. "Fixing" some variable
    means filling corresponding row/column of  A  by  zeros,  and  setting
    diagonal element to 1.

    For example, if we fix 2nd variable in 4*4 matrix A, it becomes Af:

        ( A00  A01  A02  A03 )      ( Af00  0   Af02 Af03 )
        ( A10  A11  A12  A13 )      (  0    1    0    0   )
        ( A20  A21  A22  A23 )  =>  ( Af20  0   Af22 Af23 )
        ( A30  A31  A32  A33 )      ( Af30  0   Af32 Af33 )

    If we have Cholesky decomposition of A, it must be recalculated  after
    variables were  fixed.  However,  it  is  possible  to  use  efficient
    algorithm, which needs O(K*N^2)  time  to  "fix"  K  variables,  given
    Cholesky decomposition of original, "unfixed" A.

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    Fix     -   array[N], I-th element is True if I-th  variable  must  be
                fixed. Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

NOTE: this function always succeeds, so it does not return completion code

NOTE: this function checks sizes of input arrays, but it does  NOT  checks
      for presence of infinities or NAN's.

NOTE: this  function  is  efficient  only  for  moderate amount of updated
      variables - say, 0.1*N or 0.3*N. For larger amount of  variables  it
      will  still  work,  but  you  may  get   better   performance   with
      straightforward Cholesky.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdatefix(const real_2d_array &a, const ae_int_t n, const bool isupper, const boolean_1d_array &fix, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixcholeskyupdatefix(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(fix.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
version which uses preallocated buffer which is saved  between  subsequent
function calls.

See comments for SPDMatrixCholeskyUpdateAdd1() for more information.

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    U       -   array[N], rank-1 update to A: A_mod = A + u*u'
                Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdateadd1buf(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &u, real_1d_array &bufr, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixcholeskyupdateadd1buf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufr.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Update of Cholesky  decomposition:  "fixing"  some  variables.  "Buffered"
version which uses preallocated buffer which is saved  between  subsequent
function calls.

See comments for SPDMatrixCholeskyUpdateFix() for more information.

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    Fix     -   array[N], I-th element is True if I-th  variable  must  be
                fixed. Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdatefixbuf(const real_2d_array &a, const ae_int_t n, const bool isupper, const boolean_1d_array &fix, real_1d_array &bufr, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixcholeskyupdatefixbuf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(fix.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufr.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Sparse LU decomposition with column pivoting for sparsity and row pivoting
for stability. Input must be square sparse matrix stored in CRS format.

The algorithm  computes  LU  decomposition  of  a  general  square  matrix
(rectangular ones are not supported). The result  of  an  algorithm  is  a
representation of A as A = P*L*U*Q, where:
* L is lower unitriangular matrix
* U is upper triangular matrix
* P = P0*P1*...*PK, K=N-1, Pi - permutation matrix for I and P[I]
* Q = QK*...*Q1*Q0, K=N-1, Qi - permutation matrix for I and Q[I]

This function pivots columns for higher sparsity, and then pivots rows for
stability (larger element at the diagonal).

INPUT PARAMETERS:
    A       -   sparse NxN matrix in CRS format. An exception is generated
                if matrix is non-CRS or non-square.
    PivotType-  pivoting strategy:
                * 0 for best pivoting available (2 in current version)
                * 1 for row-only pivoting (NOT RECOMMENDED)
                * 2 for complete pivoting which produces most sparse outputs

OUTPUT PARAMETERS:
    A       -   the result of factorization, matrices L and U stored in
                compact form using CRS sparse storage format:
                * lower unitriangular L is stored strictly under main diagonal
                * upper triangilar U is stored ON and ABOVE main diagonal
    P       -   row permutation matrix in compact form, array[N]
    Q       -   col permutation matrix in compact form, array[N]

This function always succeeds, i.e. it ALWAYS returns valid factorization,
but for your convenience it also returns  boolean  value  which  helps  to
detect symbolically degenerate matrices:
* function returns TRUE, if the matrix was factorized AND symbolically
  non-degenerate
* function returns FALSE, if the matrix was factorized but U has strictly
  zero elements at the diagonal (the factorization is returned anyway).


  -- ALGLIB routine --
     03.09.2018
     Bochkanov Sergey
*************************************************************************/
bool sparselu(const sparsematrix &a, const ae_int_t pivottype, integer_1d_array &p, integer_1d_array &q, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparselu(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), pivottype, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), const_cast<alglib_impl::ae_vector*>(q.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Sparse Cholesky decomposition for skyline matrixm using in-place algorithm
without allocating additional storage.

The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
definite sparse matrix. The result of an algorithm is a representation  of
A as A=U^T*U or A=L*L^T

This function allows to perform very efficient decomposition of low-profile
matrices (average bandwidth is ~5-10 elements). For larger matrices it  is
recommended to use supernodal Cholesky decomposition: SparseCholeskyP() or
SparseCholeskyAnalyze()/SparseCholeskyFactorize().

INPUT PARAMETERS:
    A       -   sparse matrix in skyline storage (SKS) format.
    N       -   size of matrix A (can be smaller than actual size of A)
    IsUpper -   if IsUpper=True, then factorization is performed on  upper
                triangle. Another triangle is ignored (it may contant some
                data, but it is not changed).


OUTPUT PARAMETERS:
    A       -   the result of factorization, stored in SKS. If IsUpper=True,
                then the upper  triangle  contains  matrix  U,  such  that
                A = U^T*U. Lower triangle is not changed.
                Similarly, if IsUpper = False. In this case L is returned,
                and we have A = L*(L^T).
                Note that THIS function does not  perform  permutation  of
                rows to reduce bandwidth.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False. Contents of A is not determined
    in such case.

NOTE: for  performance  reasons  this  function  does NOT check that input
      matrix  includes  only  finite  values. It is your responsibility to
      make sure that there are no infinite or NAN values in the matrix.

  -- ALGLIB routine --
     16.01.2014
     Bochkanov Sergey
*************************************************************************/
bool sparsecholeskyskyline(const sparsematrix &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparsecholeskyskyline(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage,
without rows/cols permutation.

This function is the most convenient (less parameters to specify), although
less efficient, version of sparse Cholesky.

Internally it:
* calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
  phase with no permutation being configured.
* calls SparseCholeskyFactorize() function to perform numerical  phase  of
  the factorization

Following alternatives may result in better performance:
* using SparseCholeskyP(), which selects best  pivoting  available,  which
  almost always results in improved sparsity and cache locality
* using  SparseCholeskyAnalyze() and SparseCholeskyFactorize()   functions
  directly,  which  may  improve  performance of repetitive factorizations
  with same sparsity patterns.

The latter also allows one to perform  LDLT  factorization  of  indefinite
matrix (one with strictly diagonal D, which is known  to  be  stable  only
in few special cases, like quasi-definite matrices).

INPUT PARAMETERS:
    A       -   a square NxN sparse matrix, stored in any storage format.
    IsUpper -   if IsUpper=True, then factorization is performed on  upper
                triangle.  Another triangle is ignored on  input,  dropped
                on output. Similarly, if IsUpper=False, the lower triangle
                is processed.

OUTPUT PARAMETERS:
    A       -   the result of factorization, stored in CRS format:
                * if IsUpper=True, then the upper triangle contains matrix
                  U such  that  A = U^T*U and the lower triangle is empty.
                * similarly, if IsUpper=False, then lower triangular L  is
                  returned and we have A = L*(L^T).
                Note that THIS function does not  perform  permutation  of
                the rows to reduce fill-in.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False.  Contents  of  A  is  undefined
    in such case.

NOTE: for  performance  reasons  this  function  does NOT check that input
      matrix  includes  only  finite  values. It is your responsibility to
      make sure that there are no infinite or NAN values in the matrix.

  -- ALGLIB routine --
     16.09.2020
     Bochkanov Sergey
*************************************************************************/
bool sparsecholesky(const sparsematrix &a, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparsecholesky(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage
format, with performance-enhancing permutation of rows/cols.

Present version is configured  to  perform  supernodal  permutation  which
sparsity reducing ordering.

This function is a wrapper around generic sparse  decomposition  functions
that internally:
* calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
  phase with best available permutation being configured.
* calls SparseCholeskyFactorize() function to perform numerical  phase  of
  the factorization.

NOTE: using  SparseCholeskyAnalyze() and SparseCholeskyFactorize() directly
      may improve  performance  of  repetitive  factorizations  with  same
      sparsity patterns. It also allows one to perform  LDLT factorization
      of  indefinite  matrix  -  a factorization with strictly diagonal D,
      which  is  known to be stable only in few special cases, like quasi-
      definite matrices.

INPUT PARAMETERS:
    A       -   a square NxN sparse matrix, stored in any storage format.
    IsUpper -   if IsUpper=True, then factorization is performed on  upper
                triangle.  Another triangle is ignored on  input,  dropped
                on output. Similarly, if IsUpper=False, the lower triangle
                is processed.

OUTPUT PARAMETERS:
    A       -   the result of factorization, stored in CRS format:
                * if IsUpper=True, then the upper triangle contains matrix
                  U such  that  A = U^T*U and the lower triangle is empty.
                * similarly, if IsUpper=False, then lower triangular L  is
                  returned and we have A = L*(L^T).
    P       -   a row/column permutation, a product of P0*P1*...*Pk, k=N-1,
                with Pi being permutation of rows/cols I and P[I]

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False.  Contents  of  A  is  undefined
    in such case.

NOTE: for  performance  reasons  this  function  does NOT check that input
      matrix  includes  only  finite  values. It is your responsibility to
      make sure that there are no infinite or NAN values in the matrix.

  -- ALGLIB routine --
     16.09.2020
     Bochkanov Sergey
*************************************************************************/
bool sparsecholeskyp(const sparsematrix &a, const bool isupper, integer_1d_array &p, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparsecholeskyp(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Sparse Cholesky/LDLT decomposition: symbolic analysis phase.

This function is a part of the 'expert' sparse Cholesky API:
* SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
  matrix to be factorized into internal storage
* SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
  with lower bounds on pivot magnitudes and additional overflow safeguards
* SparseCholeskyFactorize(),  that performs  numeric  factorization  using
  precomputed symbolic analysis and internally stored matrix - and outputs
  result
* SparseCholeskyReload(), that reloads one more matrix with same  sparsity
  pattern into internal storage so  one  may  reuse  previously  allocated
  temporaries and previously performed symbolic analysis

This specific function performs preliminary analysis of the  Cholesky/LDLT
factorization. It allows to choose  different  permutation  types  and  to
choose between classic Cholesky and  indefinite  LDLT  factorization  (the
latter is computed with strictly diagonal D, i.e.  without  Bunch-Kauffman
pivoting).

NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
      matrices. However, numerical stability is guaranteed ONLY for a class
      of quasi-definite matrices.

NOTE: all internal processing is performed with lower triangular  matrices
      stored  in  CRS  format.  Any  other  storage  formats  and/or upper
      triangular storage means  that  one  format  conversion  and/or  one
      transposition will be performed  internally  for  the  analysis  and
      factorization phases. Thus, highest  performance  is  achieved  when
      input is a lower triangular CRS matrix.

INPUT PARAMETERS:
    A           -   sparse square matrix in any sparse storage format.
    IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
                    other one is ignored).
    FactType    -   factorization type:
                    * 0 for traditional Cholesky of SPD matrix
                    * 1 for LDLT decomposition with strictly  diagonal  D,
                        which may have non-positive entries.
    PermType    -   permutation type:
                    *-1 for absence of permutation
                    * 0 for best fill-in reducing  permutation  available,
                        which is 3 in the current version
                    * 1 for supernodal ordering (improves locality and
                      performance, does NOT change fill-in factor)
                    * 2 for original AMD ordering
                    * 3 for  improved  AMD  (approximate  minimum  degree)
                        ordering with better  handling  of  matrices  with
                        dense rows/columns

OUTPUT PARAMETERS:
    Analysis    -   contains:
                    * symbolic analysis of the matrix structure which will
                      be used later to guide numerical factorization.
                    * specific numeric values loaded into internal  memory
                      waiting for the factorization to be performed

This function fails if and only if the matrix A is symbolically degenerate
i.e. has diagonal element which is exactly zero. In  such  case  False  is
returned, contents of Analysis object is undefined.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
bool sparsecholeskyanalyze(const sparsematrix &a, const bool isupper, const ae_int_t facttype, const ae_int_t permtype, sparsedecompositionanalysis &analysis, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparsecholeskyanalyze(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, facttype, permtype, const_cast<alglib_impl::sparsedecompositionanalysis*>(analysis.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Sparse Cholesky decomposition: numerical analysis phase.

This function is a part of the 'expert' sparse Cholesky API:
* SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
  matrix to be factorized into internal storage
* SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
  with lower bounds on pivot magnitudes and additional overflow safeguards
* SparseCholeskyFactorize(),  that performs  numeric  factorization  using
  precomputed symbolic analysis and internally stored matrix - and outputs
  result
* SparseCholeskyReload(), that reloads one more matrix with same  sparsity
  pattern into internal storage so  one  may  reuse  previously  allocated
  temporaries and previously performed symbolic analysis

Depending on settings specified during SparseCholeskyAnalyze() call it may
produce classic Cholesky or L*D*LT  decomposition  (with strictly diagonal
D), without permutation or with performance-enhancing permutation P.

NOTE: all internal processing is performed with lower triangular  matrices
      stored  in  CRS  format.  Any  other  storage  formats  and/or upper
      triangular storage means  that  one  format  conversion  and/or  one
      transposition will be performed  internally  for  the  analysis  and
      factorization phases. Thus, highest  performance  is  achieved  when
      input is a lower triangular CRS matrix, and lower triangular  output
      is requested.

NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
      matrices. However, numerical stability is guaranteed ONLY for a class
      of quasi-definite matrices.

INPUT PARAMETERS:
    Analysis    -   prior analysis with internally stored matrix which will
                    be factorized
    NeedUpper   -   whether upper triangular or lower triangular output is
                    needed

OUTPUT PARAMETERS:
    A           -   Cholesky decomposition of A stored in lower triangular
                    CRS format, i.e. A=L*L' (or upper triangular CRS, with
                    A=U'*U, depending on NeedUpper parameter).
    D           -   array[N], diagonal factor. If no diagonal  factor  was
                    required during analysis  phase,  still  returned  but
                    filled with 1's
    P           -   array[N], pivots. Permutation matrix P is a product of
                    P(0)*P(1)*...*P(N-1), where P(i) is a  permutation  of
                    row/col I and P[I] (with P[I]>=I).
                    If no permutation was requested during analysis phase,
                    still returned but filled with identity permutation.

The function returns True  when  factorization  resulted  in nondegenerate
matrix. False is returned when factorization fails (Cholesky factorization
of indefinite matrix) or LDLT factorization has exactly zero  elements  at
the diagonal. In the latter case contents of A, D and P is undefined.

The analysis object is not changed during  the  factorization.  Subsequent
calls to SparseCholeskyFactorize() will result in same factorization being
performed one more time.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
bool sparsecholeskyfactorize(const sparsedecompositionanalysis &analysis, const bool needupper, sparsematrix &a, real_1d_array &d, integer_1d_array &p, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::sparsecholeskyfactorize(const_cast<alglib_impl::sparsedecompositionanalysis*>(analysis.c_ptr()), needupper, const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(p.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Sparse  Cholesky  decomposition:  update  internally  stored  matrix  with
another one with exactly same sparsity pattern.

This function is a part of the 'expert' sparse Cholesky API:
* SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
  matrix to be factorized into internal storage
* SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
  with lower bounds on pivot magnitudes and additional overflow safeguards
* SparseCholeskyFactorize(),  that performs  numeric  factorization  using
  precomputed symbolic analysis and internally stored matrix - and outputs
  result
* SparseCholeskyReload(), that reloads one more matrix with same  sparsity
  pattern into internal storage so  one  may  reuse  previously  allocated
  temporaries and previously performed symbolic analysis

This specific function replaces internally stored  numerical  values  with
ones from another sparse matrix (but having exactly same sparsity  pattern
as one that was used for initial SparseCholeskyAnalyze() call).

NOTE: all internal processing is performed with lower triangular  matrices
      stored  in  CRS  format.  Any  other  storage  formats  and/or upper
      triangular storage means  that  one  format  conversion  and/or  one
      transposition will be performed  internally  for  the  analysis  and
      factorization phases. Thus, highest  performance  is  achieved  when
      input is a lower triangular CRS matrix.

INPUT PARAMETERS:
    Analysis    -   analysis object
    A           -   sparse square matrix in any sparse storage format.  It
                    MUST have exactly same sparsity pattern as that of the
                    matrix that was passed to SparseCholeskyAnalyze().
                    Any difference (missing elements or additional elements)
                    may result in unpredictable and undefined  behavior  -
                    an algorithm may fail due to memory access violation.
    IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
                    other one is ignored).

OUTPUT PARAMETERS:
    Analysis    -   contains:
                    * symbolic analysis of the matrix structure which will
                      be used later to guide numerical factorization.
                    * specific numeric values loaded into internal  memory
                      waiting for the factorization to be performed

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
void sparsecholeskyreload(const sparsedecompositionanalysis &analysis, const sparsematrix &a, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::sparsecholeskyreload(const_cast<alglib_impl::sparsedecompositionanalysis*>(analysis.c_ptr()), const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

#if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Singular value decomposition of a bidiagonal matrix (extended algorithm)

COMMERCIAL EDITION OF ALGLIB:

  ! Commercial version of ALGLIB includes one  important  improvement   of
  ! this function, which can be used from C++ and C#:
  ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
  !
  ! Intel MKL gives approximately constant  (with  respect  to  number  of
  ! worker threads) acceleration factor which depends on CPU  being  used,
  ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
  ! comparison.
  !
  ! Generally, commercial ALGLIB is several times faster than  open-source
  ! generic C edition, and many times faster than open-source C# edition.
  !
  ! Multithreaded acceleration is NOT supported for this function.
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The algorithm performs the singular value decomposition  of  a  bidiagonal
matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
orthogonal matrices, S - diagonal matrix with non-negative elements on the
main diagonal, in descending order.

The  algorithm  finds  singular  values.  In  addition,  the algorithm can
calculate  matrices  Q  and P (more precisely, not the matrices, but their
product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
matrices U and VT can be of any type, including identity. Furthermore, the
algorithm can calculate Q'*C (this product is calculated more  effectively
than U*Q,  because  this calculation operates with rows instead  of matrix
columns).

The feature of the algorithm is its ability to find  all  singular  values
including those which are arbitrarily close to 0  with  relative  accuracy
close to  machine precision. If the parameter IsFractionalAccuracyRequired
is set to True, all singular values will have high relative accuracy close
to machine precision. If the parameter is set to False, only  the  biggest
singular value will have relative accuracy  close  to  machine  precision.
The absolute error of other singular values is equal to the absolute error
of the biggest singular value.

Input parameters:
    D       -   main diagonal of matrix B.
                Array whose index ranges within [0..N-1].
    E       -   superdiagonal (or subdiagonal) of matrix B.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix B.
    IsUpper -   True, if the matrix is upper bidiagonal.
    IsFractionalAccuracyRequired -
                THIS PARAMETER IS IGNORED SINCE ALGLIB 3.5.0
                SINGULAR VALUES ARE ALWAYS SEARCHED WITH HIGH ACCURACY.
    U       -   matrix to be multiplied by Q.
                Array whose indexes range within [0..NRU-1, 0..N-1].
                The matrix can be bigger, in that case only the  submatrix
                [0..NRU-1, 0..N-1] will be multiplied by Q.
    NRU     -   number of rows in matrix U.
    C       -   matrix to be multiplied by Q'.
                Array whose indexes range within [0..N-1, 0..NCC-1].
                The matrix can be bigger, in that case only the  submatrix
                [0..N-1, 0..NCC-1] will be multiplied by Q'.
    NCC     -   number of columns in matrix C.
    VT      -   matrix to be multiplied by P^T.
                Array whose indexes range within [0..N-1, 0..NCVT-1].
                The matrix can be bigger, in that case only the  submatrix
                [0..N-1, 0..NCVT-1] will be multiplied by P^T.
    NCVT    -   number of columns in matrix VT.

Output parameters:
    D       -   singular values of matrix B in descending order.
    U       -   if NRU>0, contains matrix U*Q.
    VT      -   if NCVT>0, contains matrix (P^T)*VT.
    C       -   if NCC>0, contains matrix Q'*C.

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged (rare case).

NOTE: multiplication U*Q is performed by means of transposition to internal
      buffer, multiplication and backward transposition. It helps to avoid
      costly columnwise operations and speed-up algorithm.

Additional information:
    The type of convergence is controlled by the internal  parameter  TOL.
    If the parameter is greater than 0, the singular values will have
    relative accuracy TOL. If TOL<0, the singular values will have
    absolute accuracy ABS(TOL)*norm(B).
    By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
    where Epsilon is the machine precision. It is not  recommended  to  use
    TOL less than 10*Epsilon since this will  considerably  slow  down  the
    algorithm and may not lead to error decreasing.

History:
    * 31 March, 2007.
        changed MAXITR from 6 to 12.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1999.
*************************************************************************/
bool rmatrixbdsvd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const bool isupper, const bool isfractionalaccuracyrequired, real_2d_array &u, const ae_int_t nru, real_2d_array &c, const ae_int_t ncc, real_2d_array &vt, const ae_int_t ncvt, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::rmatrixbdsvd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, isupper, isfractionalaccuracyrequired, const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), nru, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ncc, const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), ncvt, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}
#endif

#if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Singular value decomposition of a rectangular matrix.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The algorithm calculates the singular value decomposition of a matrix of
size MxN: A = U * S * V^T

The algorithm finds the singular values and, optionally, matrices U and V^T.
The algorithm can find both first min(M,N) columns of matrix U and rows of
matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
and NxN respectively).

Take into account that the subroutine does not return matrix V but V^T.

Input parameters:
    A           -   matrix to be decomposed.
                    Array whose indexes range within [0..M-1, 0..N-1].
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    UNeeded     -   0, 1 or 2. See the description of the parameter U.
    VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
    AdditionalMemory -
                    If the parameter:
                     * equals 0, the algorithm doesn't use additional
                       memory (lower requirements, lower performance).
                     * equals 1, the algorithm uses additional
                       memory of size min(M,N)*min(M,N) of real numbers.
                       It often speeds up the algorithm.
                     * equals 2, the algorithm uses additional
                       memory of size M*min(M,N) of real numbers.
                       It allows to get a maximum performance.
                    The recommended value of the parameter is 2.

Output parameters:
    W           -   contains singular values in descending order.
    U           -   if UNeeded=0, U isn't changed, the left singular vectors
                    are not calculated.
                    if Uneeded=1, U contains left singular vectors (first
                    min(M,N) columns of matrix U). Array whose indexes range
                    within [0..M-1, 0..Min(M,N)-1].
                    if UNeeded=2, U contains matrix U wholly. Array whose
                    indexes range within [0..M-1, 0..M-1].
    VT          -   if VTNeeded=0, VT isn't changed, the right singular vectors
                    are not calculated.
                    if VTNeeded=1, VT contains right singular vectors (first
                    min(M,N) rows of matrix V^T). Array whose indexes range
                    within [0..min(M,N)-1, 0..N-1].
                    if VTNeeded=2, VT contains matrix V^T wholly. Array whose
                    indexes range within [0..N-1, 0..N-1].

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
bool rmatrixsvd(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const ae_int_t uneeded, const ae_int_t vtneeded, const ae_int_t additionalmemory, real_1d_array &w, real_2d_array &u, real_2d_array &vt, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::rmatrixsvd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, uneeded, vtneeded, additionalmemory, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}
#endif

#if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Estimate of a matrix condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixrcond1(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixrcondinf(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Condition number estimate of a symmetric positive definite matrix.

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm of condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    A       -   symmetric positive definite matrix which is given by its
                upper or lower triangle depending on the value of
                IsUpper. Array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format.

Result:
    1/LowerBound(cond(A)), if matrix A is positive definite,
   -1, if matrix A is not positive definite, and its condition number
    could not be found by this algorithm.

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double spdmatrixrcond(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::spdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Triangular matrix: estimate of a condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A       -   matrix. Array[0..N-1, 0..N-1].
    N       -   size of A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixtrrcond1(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Triangular matrix: estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixtrrcondinf(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Condition number estimate of a Hermitian positive definite matrix.

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm of condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    A       -   Hermitian positive definite matrix which is given by its
                upper or lower triangle depending on the value of
                IsUpper. Array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format.

Result:
    1/LowerBound(cond(A)), if matrix A is positive definite,
   -1, if matrix A is not positive definite, and its condition number
    could not be found by this algorithm.

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double hpdmatrixrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::hpdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Estimate of a matrix condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixrcond1(const complex_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::cmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixrcondinf(const complex_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::cmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA         -   LU decomposition of a matrix in compact form. Output of
                    the RMatrixLU subroutine.
    N           -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixlurcond1(const real_2d_array &lua, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition
(infinity norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA     -   LU decomposition of a matrix in compact form. Output of
                the RMatrixLU subroutine.
    N       -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixlurcondinf(const real_2d_array &lua, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Condition number estimate of a symmetric positive definite matrix given by
Cholesky decomposition.

The algorithm calculates a lower bound of the condition number. In this
case, the algorithm does not return a lower bound of the condition number,
but an inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    CD  - Cholesky decomposition of matrix A,
          output of SMatrixCholesky subroutine.
    N   - size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double spdmatrixcholeskyrcond(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::spdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Condition number estimate of a Hermitian positive definite matrix given by
Cholesky decomposition.

The algorithm calculates a lower bound of the condition number. In this
case, the algorithm does not return a lower bound of the condition number,
but an inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    CD  - Cholesky decomposition of matrix A,
          output of SMatrixCholesky subroutine.
    N   - size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double hpdmatrixcholeskyrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::hpdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA         -   LU decomposition of a matrix in compact form. Output of
                    the CMatrixLU subroutine.
    N           -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixlurcond1(const complex_2d_array &lua, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::cmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition
(infinity norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA     -   LU decomposition of a matrix in compact form. Output of
                the CMatrixLU subroutine.
    N       -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixlurcondinf(const complex_2d_array &lua, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::cmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Triangular matrix: estimate of a condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A       -   matrix. Array[0..N-1, 0..N-1].
    N       -   size of A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixtrrcond1(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::cmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Triangular matrix: estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixtrrcondinf(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::cmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}
#endif

#if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)

#endif

#if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This object stores state of the iterative norm estimation algorithm.

You should use ALGLIB functions to work with this object.
*************************************************************************/
_normestimatorstate_owner::_normestimatorstate_owner()
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_normestimatorstate_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    p_struct = (alglib_impl::normestimatorstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::normestimatorstate), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::normestimatorstate));
    alglib_impl::_normestimatorstate_init(p_struct, &_state, ae_false);
    ae_state_clear(&_state);
}

_normestimatorstate_owner::_normestimatorstate_owner(const _normestimatorstate_owner &rhs)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_normestimatorstate_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: normestimatorstate copy constructor failure (source is not initialized)", &_state);
    p_struct = (alglib_impl::normestimatorstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::normestimatorstate), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::normestimatorstate));
    alglib_impl::_normestimatorstate_init_copy(p_struct, const_cast<alglib_impl::normestimatorstate*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
}

_normestimatorstate_owner& _normestimatorstate_owner::operator=(const _normestimatorstate_owner &rhs)
{
    if( this==&rhs )
        return *this;
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return *this;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: normestimatorstate assignment constructor failure (destination is not initialized)", &_state);
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: normestimatorstate assignment constructor failure (source is not initialized)", &_state);
    alglib_impl::_normestimatorstate_destroy(p_struct);
    memset(p_struct, 0, sizeof(alglib_impl::normestimatorstate));
    alglib_impl::_normestimatorstate_init_copy(p_struct, const_cast<alglib_impl::normestimatorstate*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
    return *this;
}

_normestimatorstate_owner::~_normestimatorstate_owner()
{
    if( p_struct!=NULL )
    {
        alglib_impl::_normestimatorstate_destroy(p_struct);
        ae_free(p_struct);
    }
}

alglib_impl::normestimatorstate* _normestimatorstate_owner::c_ptr()
{
    return p_struct;
}

alglib_impl::normestimatorstate* _normestimatorstate_owner::c_ptr() const
{
    return const_cast<alglib_impl::normestimatorstate*>(p_struct);
}
normestimatorstate::normestimatorstate() : _normestimatorstate_owner()
{
}

normestimatorstate::normestimatorstate(const normestimatorstate &rhs):_normestimatorstate_owner(rhs)
{
}

normestimatorstate& normestimatorstate::operator=(const normestimatorstate &rhs)
{
    if( this==&rhs )
        return *this;
    _normestimatorstate_owner::operator=(rhs);
    return *this;
}

normestimatorstate::~normestimatorstate()
{
}

/*************************************************************************
This procedure initializes matrix norm estimator.

USAGE:
1. User initializes algorithm state with NormEstimatorCreate() call
2. User calls NormEstimatorEstimateSparse() (or NormEstimatorIteration())
3. User calls NormEstimatorResults() to get solution.

INPUT PARAMETERS:
    M       -   number of rows in the matrix being estimated, M>0
    N       -   number of columns in the matrix being estimated, N>0
    NStart  -   number of random starting vectors
                recommended value - at least 5.
    NIts    -   number of iterations to do with best starting vector
                recommended value - at least 5.

OUTPUT PARAMETERS:
    State   -   structure which stores algorithm state


NOTE: this algorithm is effectively deterministic, i.e. it always  returns
same result when repeatedly called for the same matrix. In fact, algorithm
uses randomized starting vectors, but internal  random  numbers  generator
always generates same sequence of the random values (it is a  feature, not
bug).

Algorithm can be made non-deterministic with NormEstimatorSetSeed(0) call.

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorcreate(const ae_int_t m, const ae_int_t n, const ae_int_t nstart, const ae_int_t nits, normestimatorstate &state, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::normestimatorcreate(m, n, nstart, nits, const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function changes seed value used by algorithm. In some cases we  need
deterministic processing, i.e. subsequent calls must return equal results,
in other cases we need non-deterministic algorithm which returns different
results for the same matrix on every pass.

Setting zero seed will lead to non-deterministic algorithm, while non-zero
value will make our algorithm deterministic.

INPUT PARAMETERS:
    State       -   norm estimator state, must be initialized with a  call
                    to NormEstimatorCreate()
    SeedVal     -   seed value, >=0. Zero value = non-deterministic algo.

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorsetseed(const normestimatorstate &state, const ae_int_t seedval, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::normestimatorsetseed(const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), seedval, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
This function estimates norm of the sparse M*N matrix A.

INPUT PARAMETERS:
    State       -   norm estimator state, must be initialized with a  call
                    to NormEstimatorCreate()
    A           -   sparse M*N matrix, must be converted to CRS format
                    prior to calling this function.

After this function  is  over  you can call NormEstimatorResults() to get
estimate of the norm(A).

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorestimatesparse(const normestimatorstate &state, const sparsematrix &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::normestimatorestimatesparse(const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Matrix norm estimation results

INPUT PARAMETERS:
    State   -   algorithm state

OUTPUT PARAMETERS:
    Nrm     -   estimate of the matrix norm, Nrm>=0

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorresults(const normestimatorstate &state, double &nrm, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::normestimatorresults(const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), &nrm, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

#if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Matrix inverse report:
* R1    reciprocal of condition number in 1-norm
* RInf  reciprocal of condition number in inf-norm
*************************************************************************/
_matinvreport_owner::_matinvreport_owner()
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_matinvreport_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::matinvreport));
    alglib_impl::_matinvreport_init(p_struct, &_state, ae_false);
    ae_state_clear(&_state);
}

_matinvreport_owner::_matinvreport_owner(const _matinvreport_owner &rhs)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
        if( p_struct!=NULL )
        {
            alglib_impl::_matinvreport_destroy(p_struct);
            alglib_impl::ae_free(p_struct);
        }
        p_struct = NULL;
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    p_struct = NULL;
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: matinvreport copy constructor failure (source is not initialized)", &_state);
    p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), &_state);
    memset(p_struct, 0, sizeof(alglib_impl::matinvreport));
    alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
}

_matinvreport_owner& _matinvreport_owner::operator=(const _matinvreport_owner &rhs)
{
    if( this==&rhs )
        return *this;
    jmp_buf _break_jump;
    alglib_impl::ae_state _state;

    alglib_impl::ae_state_init(&_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
        return *this;
#endif
    }
    alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
    alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: matinvreport assignment constructor failure (destination is not initialized)", &_state);
    alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: matinvreport assignment constructor failure (source is not initialized)", &_state);
    alglib_impl::_matinvreport_destroy(p_struct);
    memset(p_struct, 0, sizeof(alglib_impl::matinvreport));
    alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), &_state, ae_false);
    ae_state_clear(&_state);
    return *this;
}

_matinvreport_owner::~_matinvreport_owner()
{
    if( p_struct!=NULL )
    {
        alglib_impl::_matinvreport_destroy(p_struct);
        ae_free(p_struct);
    }
}

alglib_impl::matinvreport* _matinvreport_owner::c_ptr()
{
    return p_struct;
}

alglib_impl::matinvreport* _matinvreport_owner::c_ptr() const
{
    return const_cast<alglib_impl::matinvreport*>(p_struct);
}
matinvreport::matinvreport() : _matinvreport_owner() ,r1(p_struct->r1),rinf(p_struct->rinf)
{
}

matinvreport::matinvreport(const matinvreport &rhs):_matinvreport_owner(rhs) ,r1(p_struct->r1),rinf(p_struct->rinf)
{
}

matinvreport& matinvreport::operator=(const matinvreport &rhs)
{
    if( this==&rhs )
        return *this;
    _matinvreport_owner::operator=(rhs);
    return *this;
}

matinvreport::~matinvreport()
{
}

/*************************************************************************
Inversion of a matrix given by its LU decomposition.

INPUT PARAMETERS:
    A       -   LU decomposition of the matrix
                (output of RMatrixLU subroutine).
    Pivots  -   table of permutations
                (the output of RMatrixLU subroutine).
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

OUTPUT PARAMETERS:
    Info    -   return code:
                * -3    A is singular, or VERY close to singular.
                        it is filled by zeros in such cases.
                *  1    task is solved (but matrix A may be ill-conditioned,
                        check R1/RInf parameters for condition numbers).
    Rep     -   solver report, see below for more info
    A       -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].

SOLVER REPORT

Subroutine sets following fields of the Rep structure:
* R1        reciprocal of condition number: 1/cond(A), 1-norm.
* RInf      reciprocal of condition number: 1/cond(A), inf-norm.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     05.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a matrix given by its LU decomposition.

INPUT PARAMETERS:
    A       -   LU decomposition of the matrix
                (output of RMatrixLU subroutine).
    Pivots  -   table of permutations
                (the output of RMatrixLU subroutine).
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

OUTPUT PARAMETERS:
    Info    -   return code:
                * -3    A is singular, or VERY close to singular.
                        it is filled by zeros in such cases.
                *  1    task is solved (but matrix A may be ill-conditioned,
                        check R1/RInf parameters for condition numbers).
    Rep     -   solver report, see below for more info
    A       -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].

SOLVER REPORT

Subroutine sets following fields of the Rep structure:
* R1        reciprocal of condition number: 1/cond(A), 1-norm.
* RInf      reciprocal of condition number: 1/cond(A), inf-norm.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     05.02.2010
     Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixluinverse': looks like one of arguments has wrong size");
    n = a.cols();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Inversion of a general matrix.

Input parameters:
    A       -   matrix.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

Result:
    True, if the matrix is not singular.
    False, if the matrix is singular.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 2005-2010 by Bochkanov Sergey
*************************************************************************/
void rmatrixinverse(real_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a general matrix.

Input parameters:
    A       -   matrix.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

Result:
    True, if the matrix is not singular.
    False, if the matrix is singular.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 2005-2010 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void rmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixinverse': looks like one of arguments has wrong size");
    n = a.cols();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Inversion of a matrix given by its LU decomposition.

INPUT PARAMETERS:
    A       -   LU decomposition of the matrix
                (output of CMatrixLU subroutine).
    Pivots  -   table of permutations
                (the output of CMatrixLU subroutine).
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

OUTPUT PARAMETERS:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     05.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a matrix given by its LU decomposition.

INPUT PARAMETERS:
    A       -   LU decomposition of the matrix
                (output of CMatrixLU subroutine).
    Pivots  -   table of permutations
                (the output of CMatrixLU subroutine).
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

OUTPUT PARAMETERS:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     05.02.2010
     Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixluinverse': looks like one of arguments has wrong size");
    n = a.cols();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Inversion of a general matrix.

Input parameters:
    A       -   matrix
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void cmatrixinverse(complex_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a general matrix.

Input parameters:
    A       -   matrix
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void cmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixinverse': looks like one of arguments has wrong size");
    n = a.cols();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Inversion of a symmetric positive definite matrix which is given
by Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition of the matrix to be inverted:
                A=U'*U or A = L*L'.
                Output of  SPDMatrixCholesky subroutine.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given, lower half is used.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void spdmatrixcholeskyinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a symmetric positive definite matrix which is given
by Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition of the matrix to be inverted:
                A=U'*U or A = L*L'.
                Output of  SPDMatrixCholesky subroutine.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given, lower half is used.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void spdmatrixcholeskyinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    bool isupper;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixcholeskyinverse': looks like one of arguments has wrong size");
    n = a.cols();
    isupper = false;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Inversion of a symmetric positive definite matrix.

Given an upper or lower triangle of a symmetric positive definite matrix,
the algorithm generates matrix A^-1 and saves the upper or lower triangle
depending on the input.

Input parameters:
    A       -   matrix to be inverted (upper or lower triangle).
                Array with elements [0..N-1,0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given,  both lower and upper  triangles  must  be
                  filled.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void spdmatrixinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a symmetric positive definite matrix.

Given an upper or lower triangle of a symmetric positive definite matrix,
the algorithm generates matrix A^-1 and saves the upper or lower triangle
depending on the input.

Input parameters:
    A       -   matrix to be inverted (upper or lower triangle).
                Array with elements [0..N-1,0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given,  both lower and upper  triangles  must  be
                  filled.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void spdmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    bool isupper;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixinverse': looks like one of arguments has wrong size");
    if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
        _ALGLIB_CPP_EXCEPTION("'a' parameter is not symmetric matrix");
    n = a.cols();
    isupper = false;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
        if( !alglib_impl::ae_force_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
            _ALGLIB_CPP_EXCEPTION("Internal error while forcing symmetricity of 'a' parameter");
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Inversion of a Hermitian positive definite matrix which is given
by Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition of the matrix to be inverted:
                A=U'*U or A = L*L'.
                Output of  HPDMatrixCholesky subroutine.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given, lower half is used.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void hpdmatrixcholeskyinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a Hermitian positive definite matrix which is given
by Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition of the matrix to be inverted:
                A=U'*U or A = L*L'.
                Output of  HPDMatrixCholesky subroutine.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given, lower half is used.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void hpdmatrixcholeskyinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    bool isupper;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'hpdmatrixcholeskyinverse': looks like one of arguments has wrong size");
    n = a.cols();
    isupper = false;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Inversion of a Hermitian positive definite matrix.

Given an upper or lower triangle of a Hermitian positive definite matrix,
the algorithm generates matrix A^-1 and saves the upper or lower triangle
depending on the input.

Input parameters:
    A       -   matrix to be inverted (upper or lower triangle).
                Array with elements [0..N-1,0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given,  both lower and upper  triangles  must  be
                  filled.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void hpdmatrixinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inversion of a Hermitian positive definite matrix.

Given an upper or lower triangle of a Hermitian positive definite matrix,
the algorithm generates matrix A^-1 and saves the upper or lower triangle
depending on the input.

Input parameters:
    A       -   matrix to be inverted (upper or lower triangle).
                Array with elements [0..N-1,0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given,  both lower and upper  triangles  must  be
                  filled.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void hpdmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    bool isupper;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'hpdmatrixinverse': looks like one of arguments has wrong size");
    if( !alglib_impl::ae_is_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
        _ALGLIB_CPP_EXCEPTION("'a' parameter is not Hermitian matrix");
    n = a.cols();
    isupper = false;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
        if( !alglib_impl::ae_force_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
            _ALGLIB_CPP_EXCEPTION("Internal error while forcing Hermitian properties of 'a' parameter");
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Triangular matrix inverse (real)

The subroutine inverts the following types of matrices:
    * upper triangular
    * upper triangular with unit diagonal
    * lower triangular
    * lower triangular with unit diagonal

In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
also be upper (lower) triangular, and after the end of the algorithm,  the
inverse matrix replaces the source matrix. The elements  below (above) the
main diagonal are not changed by the algorithm.

If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
diagonal, and the diagonal elements are not passed to the algorithm.

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   diagonal type (optional):
                * if True, matrix has unit diagonal (a[i,i] are NOT used)
                * if False, matrix diagonal is arbitrary
                * if not given, False is assumed

Output parameters:
    Info    -   same as for RMatrixLUInverse
    Rep     -   same as for RMatrixLUInverse
    A       -   same as for RMatrixLUInverse.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 05.02.2010 by Bochkanov Sergey
*************************************************************************/
void rmatrixtrinverse(real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Triangular matrix inverse (real)

The subroutine inverts the following types of matrices:
    * upper triangular
    * upper triangular with unit diagonal
    * lower triangular
    * lower triangular with unit diagonal

In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
also be upper (lower) triangular, and after the end of the algorithm,  the
inverse matrix replaces the source matrix. The elements  below (above) the
main diagonal are not changed by the algorithm.

If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
diagonal, and the diagonal elements are not passed to the algorithm.

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   diagonal type (optional):
                * if True, matrix has unit diagonal (a[i,i] are NOT used)
                * if False, matrix diagonal is arbitrary
                * if not given, False is assumed

Output parameters:
    Info    -   same as for RMatrixLUInverse
    Rep     -   same as for RMatrixLUInverse
    A       -   same as for RMatrixLUInverse.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 05.02.2010 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void rmatrixtrinverse(real_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    bool isunit;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixtrinverse': looks like one of arguments has wrong size");
    n = a.cols();
    isunit = false;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

/*************************************************************************
Triangular matrix inverse (complex)

The subroutine inverts the following types of matrices:
    * upper triangular
    * upper triangular with unit diagonal
    * lower triangular
    * lower triangular with unit diagonal

In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
also be upper (lower) triangular, and after the end of the algorithm,  the
inverse matrix replaces the source matrix. The elements  below (above) the
main diagonal are not changed by the algorithm.

If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
diagonal, and the diagonal elements are not passed to the algorithm.

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   diagonal type (optional):
                * if True, matrix has unit diagonal (a[i,i] are NOT used)
                * if False, matrix diagonal is arbitrary
                * if not given, False is assumed

Output parameters:
    Info    -   same as for RMatrixLUInverse
    Rep     -   same as for RMatrixLUInverse
    A       -   same as for RMatrixLUInverse.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 05.02.2010 by Bochkanov Sergey
*************************************************************************/
void cmatrixtrinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Triangular matrix inverse (complex)

The subroutine inverts the following types of matrices:
    * upper triangular
    * upper triangular with unit diagonal
    * lower triangular
    * lower triangular with unit diagonal

In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
also be upper (lower) triangular, and after the end of the algorithm,  the
inverse matrix replaces the source matrix. The elements  below (above) the
main diagonal are not changed by the algorithm.

If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
diagonal, and the diagonal elements are not passed to the algorithm.

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   diagonal type (optional):
                * if True, matrix has unit diagonal (a[i,i] are NOT used)
                * if False, matrix diagonal is arbitrary
                * if not given, False is assumed

Output parameters:
    Info    -   same as for RMatrixLUInverse
    Rep     -   same as for RMatrixLUInverse
    A       -   same as for RMatrixLUInverse.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 05.02.2010 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
void cmatrixtrinverse(complex_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    bool isunit;
    if( (a.cols()!=a.rows()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixtrinverse': looks like one of arguments has wrong size");
    n = a.cols();
    isunit = false;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif
#endif

#if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm updates matrix A^-1 when adding a number to an element
of matrix A.

Input parameters:
    InvA    -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    UpdRow  -   row where the element to be updated is stored.
    UpdColumn - column where the element to be updated is stored.
    UpdVal  -   a number to be added to the element.


Output parameters:
    InvA    -   inverse of modified matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdatesimple(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const ae_int_t updcolumn, const double updval, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixinvupdatesimple(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, updcolumn, updval, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm updates matrix A^-1 when adding a vector to a row
of matrix A.

Input parameters:
    InvA    -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    UpdRow  -   the row of A whose vector V was added.
                0 <= Row <= N-1
    V       -   the vector to be added to a row.
                Array whose index ranges within [0..N-1].

Output parameters:
    InvA    -   inverse of modified matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdaterow(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const real_1d_array &v, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixinvupdaterow(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm updates matrix A^-1 when adding a vector to a column
of matrix A.

Input parameters:
    InvA        -   inverse of matrix A.
                    Array whose indexes range within [0..N-1, 0..N-1].
    N           -   size of matrix A.
    UpdColumn   -   the column of A whose vector U was added.
                    0 <= UpdColumn <= N-1
    U           -   the vector to be added to a column.
                    Array whose index ranges within [0..N-1].

Output parameters:
    InvA        -   inverse of modified matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdatecolumn(real_2d_array &inva, const ae_int_t n, const ae_int_t updcolumn, const real_1d_array &u, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixinvupdatecolumn(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updcolumn, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}

/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm computes the inverse of matrix A+u*v' by using the given matrix
A^-1 and the vectors u and v.

Input parameters:
    InvA    -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    U       -   the vector modifying the matrix.
                Array whose index ranges within [0..N-1].
    V       -   the vector modifying the matrix.
                Array whose index ranges within [0..N-1].

Output parameters:
    InvA - inverse of matrix A + u*v'.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdateuv(real_2d_array &inva, const ae_int_t n, const real_1d_array &u, const real_1d_array &v, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::rmatrixinvupdateuv(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return;
}
#endif

#if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Subroutine performing the Schur decomposition of a general matrix by using
the QR algorithm with multiple shifts.

COMMERCIAL EDITION OF ALGLIB:

  ! Commercial version of ALGLIB includes one  important  improvement   of
  ! this function, which can be used from C++ and C#:
  ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
  !
  ! Intel MKL gives approximately constant  (with  respect  to  number  of
  ! worker threads) acceleration factor which depends on CPU  being  used,
  ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
  ! comparison.
  !
  ! Multithreaded acceleration is NOT supported for this function.
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The source matrix A is represented as S'*A*S = T, where S is an orthogonal
matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of
sizes 1x1 and 2x2 on the main diagonal).

Input parameters:
    A   -   matrix to be decomposed.
            Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of A, N>=0.


Output parameters:
    A   -   contains matrix T.
            Array whose indexes range within [0..N-1, 0..N-1].
    S   -   contains Schur vectors.
            Array whose indexes range within [0..N-1, 0..N-1].

Note 1:
    The block structure of matrix T can be easily recognized: since all
    the elements below the blocks are zeros, the elements a[i+1,i] which
    are equal to 0 show the block border.

Note 2:
    The algorithm performance depends on the value of the internal parameter
    NS of the InternalSchurDecomposition subroutine which defines the number
    of shifts in the QR algorithm (similarly to the block width in block-matrix
    algorithms in linear algebra). If you require maximum performance on
    your machine, it is recommended to adjust this parameter manually.

Result:
    True,
        if the algorithm has converged and parameters A and S contain the result.
    False,
        if the algorithm has not converged.

Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library).
*************************************************************************/
bool rmatrixschur(real_2d_array &a, const ae_int_t n, real_2d_array &s, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::rmatrixschur(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(s.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}
#endif

#if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Algorithm for solving the following generalized symmetric positive-definite
eigenproblem:
    A*x = lambda*B*x (1) or
    A*B*x = lambda*x (2) or
    B*A*x = lambda*x (3).
where A is a symmetric matrix, B - symmetric positive-definite matrix.
The problem is solved by reducing it to an ordinary  symmetric  eigenvalue
problem.

Input parameters:
    A           -   symmetric matrix which is given by its upper or lower
                    triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    N           -   size of matrices A and B.
    IsUpperA    -   storage format of matrix A.
    B           -   symmetric positive-definite matrix which is given by
                    its upper or lower triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    IsUpperB    -   storage format of matrix B.
    ZNeeded     -   if ZNeeded is equal to:
                     * 0, the eigenvectors are not returned;
                     * 1, the eigenvectors are returned.
    ProblemType -   if ProblemType is equal to:
                     * 1, the following problem is solved: A*x = lambda*B*x;
                     * 2, the following problem is solved: A*B*x = lambda*x;
                     * 3, the following problem is solved: B*A*x = lambda*x.

Output parameters:
    D           -   eigenvalues in ascending order.
                    Array whose index ranges within [0..N-1].
    Z           -   if ZNeeded is equal to:
                     * 0, Z hasn't changed;
                     * 1, Z contains eigenvectors.
                    Array whose indexes range within [0..N-1, 0..N-1].
                    The eigenvectors are stored in matrix columns. It should
                    be noted that the eigenvectors in such problems do not
                    form an orthogonal system.

Result:
    True, if the problem was solved successfully.
    False, if the error occurred during the Cholesky decomposition of matrix
    B (the matrix isn't positive-definite) or during the work of the iterative
    algorithm for solving the symmetric eigenproblem.

See also the GeneralizedSymmetricDefiniteEVDReduce subroutine.

  -- ALGLIB --
     Copyright 1.28.2006 by Bochkanov Sergey
*************************************************************************/
bool smatrixgevd(const real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t zneeded, const ae_int_t problemtype, real_1d_array &d, real_2d_array &z, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixgevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, zneeded, problemtype, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}

/*************************************************************************
Algorithm for reduction of the following generalized symmetric positive-
definite eigenvalue problem:
    A*x = lambda*B*x (1) or
    A*B*x = lambda*x (2) or
    B*A*x = lambda*x (3)
to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
the given problems are the same, and the eigenvectors of the given problem
could be obtained by multiplying the obtained eigenvectors by the
transformation matrix x = R*y).

Here A is a symmetric matrix, B - symmetric positive-definite matrix.

Input parameters:
    A           -   symmetric matrix which is given by its upper or lower
                    triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    N           -   size of matrices A and B.
    IsUpperA    -   storage format of matrix A.
    B           -   symmetric positive-definite matrix which is given by
                    its upper or lower triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    IsUpperB    -   storage format of matrix B.
    ProblemType -   if ProblemType is equal to:
                     * 1, the following problem is solved: A*x = lambda*B*x;
                     * 2, the following problem is solved: A*B*x = lambda*x;
                     * 3, the following problem is solved: B*A*x = lambda*x.

Output parameters:
    A           -   symmetric matrix which is given by its upper or lower
                    triangle depending on IsUpperA. Contains matrix C.
                    Array whose indexes range within [0..N-1, 0..N-1].
    R           -   upper triangular or low triangular transformation matrix
                    which is used to obtain the eigenvectors of a given problem
                    as the product of eigenvectors of C (from the right) and
                    matrix R (from the left). If the matrix is upper
                    triangular, the elements below the main diagonal
                    are equal to 0 (and vice versa). Thus, we can perform
                    the multiplication without taking into account the
                    internal structure (which is an easier though less
                    effective way).
                    Array whose indexes range within [0..N-1, 0..N-1].
    IsUpperR    -   type of matrix R (upper or lower triangular).

Result:
    True, if the problem was reduced successfully.
    False, if the error occurred during the Cholesky decomposition of
        matrix B (the matrix is not positive-definite).

  -- ALGLIB --
     Copyright 1.28.2006 by Bochkanov Sergey
*************************************************************************/
bool smatrixgevdreduce(real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t problemtype, real_2d_array &r, bool &isupperr, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    ae_bool result = alglib_impl::smatrixgevdreduce(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, problemtype, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &isupperr, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<bool*>(&result));
}
#endif

#if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Determinant calculation of the matrix given by its LU decomposition.

Input parameters:
    A       -   LU decomposition of the matrix (output of
                RMatrixLU subroutine).
    Pivots  -   table of permutations which were made during
                the LU decomposition.
                Output of RMatrixLU subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: matrix determinant.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Determinant calculation of the matrix given by its LU decomposition.

Input parameters:
    A       -   LU decomposition of the matrix (output of
                RMatrixLU subroutine).
    Pivots  -   table of permutations which were made during
                the LU decomposition.
                Output of RMatrixLU subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: matrix determinant.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixludet': looks like one of arguments has wrong size");
    n = a.rows();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}
#endif

/*************************************************************************
Calculation of the determinant of a general matrix

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1]
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: determinant of matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
double rmatrixdet(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Calculation of the determinant of a general matrix

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1]
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: determinant of matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
double rmatrixdet(const real_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.rows()!=a.cols()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixdet': looks like one of arguments has wrong size");
    n = a.rows();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}
#endif

/*************************************************************************
Determinant calculation of the matrix given by its LU decomposition.

Input parameters:
    A       -   LU decomposition of the matrix (output of
                RMatrixLU subroutine).
    Pivots  -   table of permutations which were made during
                the LU decomposition.
                Output of RMatrixLU subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: matrix determinant.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<alglib::complex*>(&result));
}

/*************************************************************************
Determinant calculation of the matrix given by its LU decomposition.

Input parameters:
    A       -   LU decomposition of the matrix (output of
                RMatrixLU subroutine).
    Pivots  -   table of permutations which were made during
                the LU decomposition.
                Output of RMatrixLU subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: matrix determinant.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixludet': looks like one of arguments has wrong size");
    n = a.rows();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<alglib::complex*>(&result));
}
#endif

/*************************************************************************
Calculation of the determinant of a general matrix

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1]
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: determinant of matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
alglib::complex cmatrixdet(const complex_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<alglib::complex*>(&result));
}

/*************************************************************************
Calculation of the determinant of a general matrix

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1]
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: determinant of matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
alglib::complex cmatrixdet(const complex_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.rows()!=a.cols()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixdet': looks like one of arguments has wrong size");
    n = a.rows();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<alglib::complex*>(&result));
}
#endif

/*************************************************************************
Determinant calculation of the matrix given by the Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition,
                output of SMatrixCholesky subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

As the determinant is equal to the product of squares of diagonal elements,
it's not necessary to specify which triangle - lower or upper - the matrix
is stored in.

Result:
    matrix determinant.

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
double spdmatrixcholeskydet(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Determinant calculation of the matrix given by the Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition,
                output of SMatrixCholesky subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

As the determinant is equal to the product of squares of diagonal elements,
it's not necessary to specify which triangle - lower or upper - the matrix
is stored in.

Result:
    matrix determinant.

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
double spdmatrixcholeskydet(const real_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    if( (a.rows()!=a.cols()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixcholeskydet': looks like one of arguments has wrong size");
    n = a.rows();
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}
#endif

/*************************************************************************
Determinant calculation of the symmetric positive definite matrix.

Input parameters:
    A       -   matrix. Array with elements [0..N-1, 0..N-1].
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)
    IsUpper -   (optional) storage type:
                * if True, symmetric matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't used/changed  by
                  function
                * if False, symmetric matrix  A  is  given  by  its lower
                  triangle, and the upper triangle isn't used/changed  by
                  function
                * if not given, both lower and upper  triangles  must  be
                  filled.

Result:
    determinant of matrix A.
    If matrix A is not positive definite, exception is thrown.

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
double spdmatrixdet(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
    {
#if !defined(AE_NO_EXCEPTIONS)
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
#else
        _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
        return 0;
#endif
    }
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}

/*************************************************************************
Determinant calculation of the symmetric positive definite matrix.

Input parameters:
    A       -   matrix. Array with elements [0..N-1, 0..N-1].
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)
    IsUpper -   (optional) storage type:
                * if True, symmetric matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't used/changed  by
                  function
                * if False, symmetric matrix  A  is  given  by  its lower
                  triangle, and the upper triangle isn't used/changed  by
                  function
                * if not given, both lower and upper  triangles  must  be
                  filled.

Result:
    determinant of matrix A.
    If matrix A is not positive definite, exception is thrown.

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
#if !defined(AE_NO_EXCEPTIONS)
double spdmatrixdet(const real_2d_array &a, const xparams _xparams)
{
    jmp_buf _break_jump;
    alglib_impl::ae_state _alglib_env_state;
    ae_int_t n;
    bool isupper;
    if( (a.rows()!=a.cols()))
        _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixdet': looks like one of arguments has wrong size");
    if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
        _ALGLIB_CPP_EXCEPTION("'a' parameter is not symmetric matrix");
    n = a.rows();
    isupper = false;
    alglib_impl::ae_state_init(&_alglib_env_state);
    if( setjmp(_break_jump) )
        _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
    ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
    if( _xparams.flags!=0x0 )
        ae_state_set_flags(&_alglib_env_state, _xparams.flags);
    double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);

    alglib_impl::ae_state_clear(&_alglib_env_state);
    return *(reinterpret_cast<double*>(&result));
}
#endif
#endif
}

/////////////////////////////////////////////////////////////////////////
//
// THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
//
/////////////////////////////////////////////////////////////////////////
namespace alglib_impl
{
#if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)
static ae_int_t ablas_blas2minvendorkernelsize = 8;
static void ablas_ablasinternalsplitlength(ae_int_t n,
     ae_int_t nb,
     ae_int_t* n1,
     ae_int_t* n2,
     ae_state *_state);
static void ablas_cmatrixrighttrsm2(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Complex */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state);
static void ablas_cmatrixlefttrsm2(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Complex */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state);
static void ablas_rmatrixrighttrsm2(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state);
static void ablas_rmatrixlefttrsm2(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state);
static void ablas_cmatrixherk2(ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     double beta,
     /* Complex */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_bool isupper,
     ae_state *_state);
static void ablas_rmatrixsyrk2(ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     double beta,
     /* Real    */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_bool isupper,
     ae_state *_state);
static void ablas_cmatrixgemmrec(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     ae_complex alpha,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     /* Complex */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_int_t optypeb,
     ae_complex beta,
     /* Complex */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_state *_state);
ae_bool _trypexec_ablas_cmatrixgemmrec(ae_int_t m,
    ae_int_t n,
    ae_int_t k,
    ae_complex alpha,
    /* Complex */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    /* Complex */ ae_matrix* b,
    ae_int_t ib,
    ae_int_t jb,
    ae_int_t optypeb,
    ae_complex beta,
    /* Complex */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc, ae_state *_state);
static void ablas_rmatrixgemmrec(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     /* Real    */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_int_t optypeb,
     double beta,
     /* Real    */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_state *_state);
ae_bool _trypexec_ablas_rmatrixgemmrec(ae_int_t m,
    ae_int_t n,
    ae_int_t k,
    double alpha,
    /* Real    */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    /* Real    */ ae_matrix* b,
    ae_int_t ib,
    ae_int_t jb,
    ae_int_t optypeb,
    double beta,
    /* Real    */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc, ae_state *_state);


#endif
#if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)
static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* work,
     /* Complex */ ae_vector* t,
     /* Complex */ ae_vector* tau,
     ae_state *_state);
static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* work,
     /* Complex */ ae_vector* t,
     /* Complex */ ae_vector* tau,
     ae_state *_state);
static void ortfac_rmatrixblockreflector(/* Real    */ ae_matrix* a,
     /* Real    */ ae_vector* tau,
     ae_bool columnwisea,
     ae_int_t lengtha,
     ae_int_t blocksize,
     /* Real    */ ae_matrix* t,
     /* Real    */ ae_vector* work,
     ae_state *_state);
static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a,
     /* Complex */ ae_vector* tau,
     ae_bool columnwisea,
     ae_int_t lengtha,
     ae_int_t blocksize,
     /* Complex */ ae_matrix* t,
     /* Complex */ ae_vector* work,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)


#endif
#if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)
static double sparse_desiredloadfactor = 0.66;
static double sparse_maxloadfactor = 0.75;
static double sparse_growfactor = 2.00;
static ae_int_t sparse_additional = 10;
static ae_int_t sparse_linalgswitch = 16;
static ae_int_t sparse_hash(ae_int_t i,
     ae_int_t j,
     ae_int_t tabsize,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)
static void hsschur_internalauxschur(ae_bool wantt,
     ae_bool wantz,
     ae_int_t n,
     ae_int_t ilo,
     ae_int_t ihi,
     /* Real    */ ae_matrix* h,
     /* Real    */ ae_vector* wr,
     /* Real    */ ae_vector* wi,
     ae_int_t iloz,
     ae_int_t ihiz,
     /* Real    */ ae_matrix* z,
     /* Real    */ ae_vector* work,
     /* Real    */ ae_vector* workv3,
     /* Real    */ ae_vector* workc1,
     /* Real    */ ae_vector* works1,
     ae_int_t* info,
     ae_state *_state);
static void hsschur_aux2x2schur(double* a,
     double* b,
     double* c,
     double* d,
     double* rt1r,
     double* rt1i,
     double* rt2r,
     double* rt2i,
     double* cs,
     double* sn,
     ae_state *_state);
static double hsschur_extschursign(double a, double b, ae_state *_state);
static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state);


#endif
#if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)
static ae_int_t evd_stepswithintol = 2;
static void evd_clearrfields(eigsubspacestate* state, ae_state *_state);
static ae_bool evd_tridiagonalevd(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_int_t zneeded,
     /* Real    */ ae_matrix* z,
     ae_state *_state);
static void evd_tdevde2(double a,
     double b,
     double c,
     double* rt1,
     double* rt2,
     ae_state *_state);
static void evd_tdevdev2(double a,
     double b,
     double c,
     double* rt1,
     double* rt2,
     double* cs1,
     double* sn1,
     ae_state *_state);
static double evd_tdevdpythag(double a, double b, ae_state *_state);
static double evd_tdevdextsign(double a, double b, ae_state *_state);
static ae_bool evd_internalbisectioneigenvalues(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_int_t irange,
     ae_int_t iorder,
     double vl,
     double vu,
     ae_int_t il,
     ae_int_t iu,
     double abstol,
     /* Real    */ ae_vector* w,
     ae_int_t* m,
     ae_int_t* nsplit,
     /* Integer */ ae_vector* iblock,
     /* Integer */ ae_vector* isplit,
     ae_int_t* errorcode,
     ae_state *_state);
static void evd_internaldstein(ae_int_t n,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t m,
     /* Real    */ ae_vector* w,
     /* Integer */ ae_vector* iblock,
     /* Integer */ ae_vector* isplit,
     /* Real    */ ae_matrix* z,
     /* Integer */ ae_vector* ifail,
     ae_int_t* info,
     ae_state *_state);
static void evd_tdininternaldlagtf(ae_int_t n,
     /* Real    */ ae_vector* a,
     double lambdav,
     /* Real    */ ae_vector* b,
     /* Real    */ ae_vector* c,
     double tol,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* iin,
     ae_int_t* info,
     ae_state *_state);
static void evd_tdininternaldlagts(ae_int_t n,
     /* Real    */ ae_vector* a,
     /* Real    */ ae_vector* b,
     /* Real    */ ae_vector* c,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* iin,
     /* Real    */ ae_vector* y,
     double* tol,
     ae_int_t* info,
     ae_state *_state);
static void evd_internaldlaebz(ae_int_t ijob,
     ae_int_t nitmax,
     ae_int_t n,
     ae_int_t mmax,
     ae_int_t minp,
     double abstol,
     double reltol,
     double pivmin,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     /* Real    */ ae_vector* e2,
     /* Integer */ ae_vector* nval,
     /* Real    */ ae_matrix* ab,
     /* Real    */ ae_vector* c,
     ae_int_t* mout,
     /* Integer */ ae_matrix* nab,
     /* Real    */ ae_vector* work,
     /* Integer */ ae_vector* iwork,
     ae_int_t* info,
     ae_state *_state);
static void evd_rmatrixinternaltrevc(/* Real    */ ae_matrix* t,
     ae_int_t n,
     ae_int_t side,
     ae_int_t howmny,
     /* Boolean */ ae_vector* vselect,
     /* Real    */ ae_matrix* vl,
     /* Real    */ ae_matrix* vr,
     ae_int_t* m,
     ae_int_t* info,
     ae_state *_state);
static void evd_internaltrevc(/* Real    */ ae_matrix* t,
     ae_int_t n,
     ae_int_t side,
     ae_int_t howmny,
     /* Boolean */ ae_vector* vselect,
     /* Real    */ ae_matrix* vl,
     /* Real    */ ae_matrix* vr,
     ae_int_t* m,
     ae_int_t* info,
     ae_state *_state);
static void evd_internalhsevdlaln2(ae_bool ltrans,
     ae_int_t na,
     ae_int_t nw,
     double smin,
     double ca,
     /* Real    */ ae_matrix* a,
     double d1,
     double d2,
     /* Real    */ ae_matrix* b,
     double wr,
     double wi,
     /* Boolean */ ae_vector* rswap4,
     /* Boolean */ ae_vector* zswap4,
     /* Integer */ ae_matrix* ipivot44,
     /* Real    */ ae_vector* civ4,
     /* Real    */ ae_vector* crv4,
     /* Real    */ ae_matrix* x,
     double* scl,
     double* xnorm,
     ae_int_t* info,
     ae_state *_state);
static void evd_internalhsevdladiv(double a,
     double b,
     double c,
     double d,
     double* p,
     double* q,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)
static void dlu_cmatrixlup2(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Complex */ ae_vector* tmp,
     ae_state *_state);
static void dlu_rmatrixlup2(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Real    */ ae_vector* tmp,
     ae_state *_state);
static void dlu_cmatrixplu2(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Complex */ ae_vector* tmp,
     ae_state *_state);
static void dlu_rmatrixplu2(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Real    */ ae_vector* tmp,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)
static double sptrf_densebnd = 0.10;
static ae_int_t sptrf_slswidth = 8;
static void sptrf_sluv2list1init(ae_int_t n,
     sluv2list1matrix* a,
     ae_state *_state);
static void sptrf_sluv2list1swap(sluv2list1matrix* a,
     ae_int_t i,
     ae_int_t j,
     ae_state *_state);
static void sptrf_sluv2list1dropsequence(sluv2list1matrix* a,
     ae_int_t i,
     ae_state *_state);
static void sptrf_sluv2list1appendsequencetomatrix(sluv2list1matrix* a,
     ae_int_t src,
     ae_bool hasdiagonal,
     double d,
     ae_int_t nzmax,
     sparsematrix* s,
     ae_int_t dst,
     ae_state *_state);
static void sptrf_sluv2list1pushsparsevector(sluv2list1matrix* a,
     /* Integer */ ae_vector* si,
     /* Real    */ ae_vector* sv,
     ae_int_t nz,
     ae_state *_state);
static void sptrf_densetrailinit(sluv2densetrail* d,
     ae_int_t n,
     ae_state *_state);
static void sptrf_densetrailappendcolumn(sluv2densetrail* d,
     /* Real    */ ae_vector* x,
     ae_int_t id,
     ae_state *_state);
static void sptrf_sparsetrailinit(sparsematrix* s,
     sluv2sparsetrail* a,
     ae_state *_state);
static ae_bool sptrf_sparsetrailfindpivot(sluv2sparsetrail* a,
     ae_int_t pivottype,
     ae_int_t* ipiv,
     ae_int_t* jpiv,
     ae_state *_state);
static void sptrf_sparsetrailpivotout(sluv2sparsetrail* a,
     ae_int_t ipiv,
     ae_int_t jpiv,
     double* uu,
     /* Integer */ ae_vector* v0i,
     /* Real    */ ae_vector* v0r,
     ae_int_t* nz0,
     /* Integer */ ae_vector* v1i,
     /* Real    */ ae_vector* v1r,
     ae_int_t* nz1,
     ae_state *_state);
static void sptrf_sparsetraildensify(sluv2sparsetrail* a,
     ae_int_t i1,
     sluv2list1matrix* bupper,
     sluv2densetrail* dtrail,
     ae_state *_state);
static void sptrf_sparsetrailupdate(sluv2sparsetrail* a,
     /* Integer */ ae_vector* v0i,
     /* Real    */ ae_vector* v0r,
     ae_int_t nz0,
     /* Integer */ ae_vector* v1i,
     /* Real    */ ae_vector* v1r,
     ae_int_t nz1,
     sluv2list1matrix* bupper,
     sluv2densetrail* dtrail,
     ae_bool densificationsupported,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)
static ae_int_t amdordering_knsheadersize = 2;
static ae_int_t amdordering_llmentrysize = 6;
static void amdordering_nsinitemptyslow(ae_int_t n,
     amdnset* sa,
     ae_state *_state);
static void amdordering_nscopy(amdnset* ssrc,
     amdnset* sdst,
     ae_state *_state);
static void amdordering_nsaddelement(amdnset* sa,
     ae_int_t k,
     ae_state *_state);
static void amdordering_nsaddkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state);
static void amdordering_nssubtract1(amdnset* sa,
     amdnset* src,
     ae_state *_state);
static void amdordering_nssubtractkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state);
static void amdordering_nsclear(amdnset* sa, ae_state *_state);
static ae_int_t amdordering_nscount(amdnset* sa, ae_state *_state);
static ae_int_t amdordering_nscountnotkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state);
static ae_int_t amdordering_nscountandkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state);
static ae_bool amdordering_nsequal(amdnset* s0,
     amdnset* s1,
     ae_state *_state);
static void amdordering_nsstartenumeration(amdnset* sa, ae_state *_state);
static ae_bool amdordering_nsenumerate(amdnset* sa,
     ae_int_t* i,
     ae_state *_state);
static void amdordering_knscompressstorage(amdknset* sa, ae_state *_state);
static void amdordering_knsreallocate(amdknset* sa,
     ae_int_t setidx,
     ae_int_t newallocated,
     ae_state *_state);
static void amdordering_knsinit(ae_int_t k,
     ae_int_t n,
     ae_int_t kprealloc,
     amdknset* sa,
     ae_state *_state);
static void amdordering_knsinitfroma(sparsematrix* a,
     ae_int_t n,
     amdknset* sa,
     ae_state *_state);
static void amdordering_knsstartenumeration(amdknset* sa,
     ae_int_t i,
     ae_state *_state);
static ae_bool amdordering_knsenumerate(amdknset* sa,
     ae_int_t* i,
     ae_state *_state);
static void amdordering_knsdirectaccess(amdknset* sa,
     ae_int_t k,
     ae_int_t* idxbegin,
     ae_int_t* idxend,
     ae_state *_state);
static void amdordering_knsaddnewelement(amdknset* sa,
     ae_int_t i,
     ae_int_t k,
     ae_state *_state);
static void amdordering_knssubtract1(amdknset* sa,
     ae_int_t i,
     amdnset* src,
     ae_state *_state);
static void amdordering_knsaddkthdistinct(amdknset* sa,
     ae_int_t i,
     amdknset* src,
     ae_int_t k,
     ae_state *_state);
static ae_int_t amdordering_knscountkth(amdknset* s0,
     ae_int_t k,
     ae_state *_state);
static ae_int_t amdordering_knscountnot(amdknset* s0,
     ae_int_t i,
     amdnset* s1,
     ae_state *_state);
static ae_int_t amdordering_knscountnotkth(amdknset* s0,
     ae_int_t i,
     amdknset* s1,
     ae_int_t k,
     ae_state *_state);
static ae_int_t amdordering_knscountandkth(amdknset* s0,
     ae_int_t i,
     amdknset* s1,
     ae_int_t k,
     ae_state *_state);
static ae_int_t amdordering_knssumkth(amdknset* s0,
     ae_int_t i,
     ae_state *_state);
static void amdordering_knsclearkthnoreclaim(amdknset* sa,
     ae_int_t k,
     ae_state *_state);
static void amdordering_knsclearkthreclaim(amdknset* sa,
     ae_int_t k,
     ae_state *_state);
static void amdordering_mtxinit(ae_int_t n,
     amdllmatrix* a,
     ae_state *_state);
static void amdordering_mtxaddcolumnto(amdllmatrix* a,
     ae_int_t j,
     amdnset* s,
     ae_state *_state);
static void amdordering_mtxinsertnewelement(amdllmatrix* a,
     ae_int_t i,
     ae_int_t j,
     ae_state *_state);
static ae_int_t amdordering_mtxcountcolumnnot(amdllmatrix* a,
     ae_int_t j,
     amdnset* s,
     ae_state *_state);
static ae_int_t amdordering_mtxcountcolumn(amdllmatrix* a,
     ae_int_t j,
     ae_state *_state);
static void amdordering_mtxclearx(amdllmatrix* a,
     ae_int_t k,
     ae_bool iscol,
     ae_state *_state);
static void amdordering_mtxclearcolumn(amdllmatrix* a,
     ae_int_t j,
     ae_state *_state);
static void amdordering_mtxclearrow(amdllmatrix* a,
     ae_int_t j,
     ae_state *_state);
static void amdordering_vtxinit(sparsematrix* a,
     ae_int_t n,
     ae_bool checkexactdegrees,
     amdvertexset* s,
     ae_state *_state);
static void amdordering_vtxremovevertex(amdvertexset* s,
     ae_int_t p,
     ae_state *_state);
static ae_int_t amdordering_vtxgetapprox(amdvertexset* s,
     ae_int_t p,
     ae_state *_state);
static ae_int_t amdordering_vtxgetexact(amdvertexset* s,
     ae_int_t p,
     ae_state *_state);
static ae_int_t amdordering_vtxgetapproxmindegree(amdvertexset* s,
     ae_state *_state);
static void amdordering_vtxupdateapproximatedegree(amdvertexset* s,
     ae_int_t p,
     ae_int_t dnew,
     ae_state *_state);
static void amdordering_vtxupdateexactdegree(amdvertexset* s,
     ae_int_t p,
     ae_int_t d,
     ae_state *_state);
static void amdordering_amdselectpivotelement(amdbuffer* buf,
     ae_int_t k,
     ae_int_t* p,
     ae_int_t* nodesize,
     ae_state *_state);
static void amdordering_amdcomputelp(amdbuffer* buf,
     ae_int_t p,
     ae_state *_state);
static void amdordering_amdmasselimination(amdbuffer* buf,
     ae_int_t p,
     ae_int_t k,
     ae_int_t tau,
     ae_state *_state);
static void amdordering_amddetectsupernodes(amdbuffer* buf,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)
static ae_int_t spchol_maxsupernode = 4;
static double spchol_maxmergeinefficiency = 0.25;
static ae_int_t spchol_smallfakestolerance = 2;
static ae_int_t spchol_maxfastkernel = 4;
static ae_bool spchol_relaxedsupernodes = ae_true;
#ifdef ALGLIB_NO_FAST_KERNELS
static ae_int_t spchol_spsymmgetmaxsimd(ae_state *_state);
#endif
#ifdef ALGLIB_NO_FAST_KERNELS
static void spchol_propagatefwd(/* Real    */ ae_vector* x,
     ae_int_t cols0,
     ae_int_t blocksize,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t rbase,
     ae_int_t offdiagsize,
     /* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t sstride,
     /* Real    */ ae_vector* simdbuf,
     ae_int_t simdwidth,
     ae_state *_state);
#endif
static void spchol_generatedbgpermutation(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* perm,
     /* Integer */ ae_vector* invperm,
     ae_state *_state);
static void spchol_buildunorderedetree(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* parent,
     /* Integer */ ae_vector* tabove,
     ae_state *_state);
static void spchol_fromparenttochildren(/* Integer */ ae_vector* parent,
     ae_int_t n,
     /* Integer */ ae_vector* childrenr,
     /* Integer */ ae_vector* childreni,
     /* Integer */ ae_vector* ttmp0,
     ae_state *_state);
static void spchol_buildorderedetree(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* parent,
     /* Integer */ ae_vector* supernodalpermutation,
     /* Integer */ ae_vector* invsupernodalpermutation,
     /* Integer */ ae_vector* trawparentofrawnode,
     /* Integer */ ae_vector* trawparentofreorderednode,
     /* Integer */ ae_vector* ttmp,
     /* Boolean */ ae_vector* tflagarray,
     ae_state *_state);
static void spchol_createsupernodalstructure(sparsematrix* at,
     /* Integer */ ae_vector* parent,
     ae_int_t n,
     spcholanalysis* analysis,
     /* Integer */ ae_vector* node2supernode,
     /* Integer */ ae_vector* tchildrenr,
     /* Integer */ ae_vector* tchildreni,
     /* Integer */ ae_vector* tparentnodeofsupernode,
     /* Integer */ ae_vector* tfakenonzeros,
     /* Integer */ ae_vector* ttmp0,
     /* Boolean */ ae_vector* tflagarray,
     ae_state *_state);
static void spchol_analyzesupernodaldependencies(spcholanalysis* analysis,
     sparsematrix* rawa,
     /* Integer */ ae_vector* node2supernode,
     ae_int_t n,
     /* Integer */ ae_vector* ttmp0,
     /* Integer */ ae_vector* ttmp1,
     /* Boolean */ ae_vector* tflagarray,
     ae_state *_state);
static void spchol_loadmatrix(spcholanalysis* analysis,
     sparsematrix* at,
     ae_state *_state);
static void spchol_extractmatrix(spcholanalysis* analysis,
     /* Integer */ ae_vector* offsets,
     /* Integer */ ae_vector* strides,
     /* Real    */ ae_vector* rowstorage,
     /* Real    */ ae_vector* diagd,
     ae_int_t n,
     sparsematrix* a,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* p,
     /* Integer */ ae_vector* tmpp,
     ae_state *_state);
static void spchol_partialcholeskypattern(sparsematrix* a,
     ae_int_t head,
     ae_int_t tail,
     sparsematrix* atail,
     /* Integer */ ae_vector* tmpparent,
     /* Integer */ ae_vector* tmpchildrenr,
     /* Integer */ ae_vector* tmpchildreni,
     /* Integer */ ae_vector* tmp1,
     /* Boolean */ ae_vector* flagarray,
     sparsematrix* tmpbottomt,
     sparsematrix* tmpupdatet,
     sparsematrix* tmpupdate,
     sparsematrix* tmpnewtailt,
     ae_state *_state);
static void spchol_topologicalpermutation(sparsematrix* a,
     /* Integer */ ae_vector* p,
     sparsematrix* b,
     ae_state *_state);
static ae_int_t spchol_computenonzeropattern(sparsematrix* wrkat,
     ae_int_t columnidx,
     ae_int_t n,
     /* Integer */ ae_vector* superrowridx,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t nsuper,
     /* Integer */ ae_vector* childrennodesr,
     /* Integer */ ae_vector* childrennodesi,
     /* Integer */ ae_vector* node2supernode,
     /* Boolean */ ae_vector* truearray,
     /* Integer */ ae_vector* tmp0,
     ae_state *_state);
static ae_int_t spchol_updatesupernode(spcholanalysis* analysis,
     ae_int_t sidx,
     ae_int_t cols0,
     ae_int_t cols1,
     ae_int_t offss,
     /* Integer */ ae_vector* raw2smap,
     ae_int_t uidx,
     ae_int_t wrkrow,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     ae_state *_state);
static ae_bool spchol_factorizesupernode(spcholanalysis* analysis,
     ae_int_t sidx,
     ae_state *_state);
static ae_int_t spchol_recommendedstridefor(ae_int_t rowsize,
     ae_state *_state);
static ae_int_t spchol_alignpositioninarray(ae_int_t offs,
     ae_state *_state);
#ifdef ALGLIB_NO_FAST_KERNELS
static ae_bool spchol_updatekernel4444(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t sheight,
     ae_int_t offsu,
     ae_int_t uheight,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state);
#endif
#ifdef ALGLIB_NO_FAST_KERNELS
static ae_bool spchol_updatekernelabc4(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t twidth,
     ae_int_t offsu,
     ae_int_t uheight,
     ae_int_t urank,
     ae_int_t urowstride,
     ae_int_t uwidth,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state);
#endif
static ae_bool spchol_updatekernelrank1(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t twidth,
     ae_int_t trowstride,
     ae_int_t offsu,
     ae_int_t uheight,
     ae_int_t uwidth,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state);
static ae_bool spchol_updatekernelrank2(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t twidth,
     ae_int_t trowstride,
     ae_int_t offsu,
     ae_int_t uheight,
     ae_int_t uwidth,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state);
static void spchol_slowdebugchecks(sparsematrix* a,
     /* Integer */ ae_vector* fillinperm,
     ae_int_t n,
     ae_int_t tail,
     sparsematrix* referencetaila,
     ae_state *_state);
static ae_bool spchol_dbgmatrixcholesky2(/* Real    */ ae_matrix* aaa,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)
static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tmp,
     ae_state *_state);
static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tmp,
     ae_state *_state);
static ae_bool trfac_spdmatrixcholesky2(/* Real    */ ae_matrix* aaa,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* tmp,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)
static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isfractionalaccuracyrequired,
     /* Real    */ ae_matrix* uu,
     ae_int_t ustart,
     ae_int_t nru,
     /* Real    */ ae_matrix* c,
     ae_int_t cstart,
     ae_int_t ncc,
     /* Real    */ ae_matrix* vt,
     ae_int_t vstart,
     ae_int_t ncvt,
     ae_state *_state);
static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state);
static void bdsvd_svd2x2(double f,
     double g,
     double h,
     double* ssmin,
     double* ssmax,
     ae_state *_state);
static void bdsvd_svdv2x2(double f,
     double g,
     double h,
     double* ssmin,
     double* ssmax,
     double* snr,
     double* csr,
     double* snl,
     double* csl,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)


#endif
#if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)
static void rcond_rmatrixrcondtrinternal(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_bool onenorm,
     double anorm,
     double* rc,
     ae_state *_state);
static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_bool onenorm,
     double anorm,
     double* rc,
     ae_state *_state);
static void rcond_spdmatrixrcondcholeskyinternal(/* Real    */ ae_matrix* cha,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isnormprovided,
     double anorm,
     double* rc,
     ae_state *_state);
static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isnormprovided,
     double anorm,
     double* rc,
     ae_state *_state);
static void rcond_rmatrixrcondluinternal(/* Real    */ ae_matrix* lua,
     ae_int_t n,
     ae_bool onenorm,
     ae_bool isanormprovided,
     double anorm,
     double* rc,
     ae_state *_state);
static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua,
     ae_int_t n,
     ae_bool onenorm,
     ae_bool isanormprovided,
     double anorm,
     double* rc,
     ae_state *_state);
static void rcond_rmatrixestimatenorm(ae_int_t n,
     /* Real    */ ae_vector* v,
     /* Real    */ ae_vector* x,
     /* Integer */ ae_vector* isgn,
     double* est,
     ae_int_t* kase,
     ae_state *_state);
static void rcond_cmatrixestimatenorm(ae_int_t n,
     /* Complex */ ae_vector* v,
     /* Complex */ ae_vector* x,
     double* est,
     ae_int_t* kase,
     /* Integer */ ae_vector* isave,
     /* Real    */ ae_vector* rsave,
     ae_state *_state);
static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x,
     ae_int_t n,
     ae_state *_state);
static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x,
     ae_int_t n,
     ae_state *_state);
static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave,
     /* Real    */ ae_vector* rsave,
     ae_int_t* i,
     ae_int_t* iter,
     ae_int_t* j,
     ae_int_t* jlast,
     ae_int_t* jump,
     double* absxi,
     double* altsgn,
     double* estold,
     double* temp,
     ae_state *_state);
static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave,
     /* Real    */ ae_vector* rsave,
     ae_int_t* i,
     ae_int_t* iter,
     ae_int_t* j,
     ae_int_t* jlast,
     ae_int_t* jump,
     double* absxi,
     double* altsgn,
     double* estold,
     double* temp,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)


#endif
#if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)


#endif
#if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)
static void matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     /* Real    */ ae_vector* tmp,
     sinteger* info,
     ae_state *_state);
ae_bool _trypexec_matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    ae_bool isupper,
    ae_bool isunit,
    /* Real    */ ae_vector* tmp,
    sinteger* info, ae_state *_state);
static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     /* Complex */ ae_vector* tmp,
     sinteger* info,
     ae_state *_state);
ae_bool _trypexec_matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    ae_bool isupper,
    ae_bool isunit,
    /* Complex */ ae_vector* tmp,
    sinteger* info, ae_state *_state);
static void matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     /* Real    */ ae_vector* work,
     sinteger* info,
     matinvreport* rep,
     ae_state *_state);
ae_bool _trypexec_matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    /* Real    */ ae_vector* work,
    sinteger* info,
    matinvreport* rep, ae_state *_state);
static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     /* Complex */ ae_vector* work,
     sinteger* ssinfo,
     matinvreport* rep,
     ae_state *_state);
ae_bool _trypexec_matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    /* Complex */ ae_vector* work,
    sinteger* ssinfo,
    matinvreport* rep, ae_state *_state);
static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tmp,
     ae_state *_state);


#endif
#if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)


#endif
#if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)


#endif
#if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)


#endif
#if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)


#endif

#if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Splits matrix length in two parts, left part should match ABLAS block size

INPUT PARAMETERS
    A   -   real matrix, is passed to ensure that we didn't split
            complex matrix using real splitting subroutine.
            matrix itself is not changed.
    N   -   length, N>0

OUTPUT PARAMETERS
    N1  -   length
    N2  -   length

N1+N2=N, N1>=N2, N2 may be zero

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
void ablassplitlength(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_int_t* n1,
     ae_int_t* n2,
     ae_state *_state)
{

    *n1 = 0;
    *n2 = 0;

    if( n>ablasblocksize(a, _state) )
    {
        ablas_ablasinternalsplitlength(n, ablasblocksize(a, _state), n1, n2, _state);
    }
    else
    {
        ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state);
    }
}


/*************************************************************************
Complex ABLASSplitLength

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
void ablascomplexsplitlength(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_int_t* n1,
     ae_int_t* n2,
     ae_state *_state)
{

    *n1 = 0;
    *n2 = 0;

    if( n>ablascomplexblocksize(a, _state) )
    {
        ablas_ablasinternalsplitlength(n, ablascomplexblocksize(a, _state), n1, n2, _state);
    }
    else
    {
        ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state);
    }
}


/*************************************************************************
Returns switch point for parallelism.

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
ae_int_t gemmparallelsize(ae_state *_state)
{
    ae_int_t result;


    result = 64;
    return result;
}


/*************************************************************************
Returns block size - subdivision size where  cache-oblivious  soubroutines
switch to the optimized kernel.

INPUT PARAMETERS
    A   -   real matrix, is passed to ensure that we didn't split
            complex matrix using real splitting subroutine.
            matrix itself is not changed.

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
ae_int_t ablasblocksize(/* Real    */ ae_matrix* a, ae_state *_state)
{
    ae_int_t result;


    result = 32;
    return result;
}


/*************************************************************************
Block size for complex subroutines.

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
ae_int_t ablascomplexblocksize(/* Complex */ ae_matrix* a,
     ae_state *_state)
{
    ae_int_t result;


    result = 24;
    return result;
}


/*************************************************************************
Microblock size

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
ae_int_t ablasmicroblocksize(ae_state *_state)
{
    ae_int_t result;


    result = 8;
    return result;
}


/*************************************************************************
Generation of an elementary reflection transformation

The subroutine generates elementary reflection H of order N, so that, for
a given X, the following equality holds true:

    ( X(1) )   ( Beta )
H * (  ..  ) = (  0   )
    ( X(n) )   (  0   )

where
              ( V(1) )
H = 1 - Tau * (  ..  ) * ( V(1), ..., V(n) )
              ( V(n) )

where the first component of vector V equals 1.

Input parameters:
    X   -   vector. Array whose index ranges within [1..N].
    N   -   reflection order.

Output parameters:
    X   -   components from 2 to N are replaced with vector V.
            The first component is replaced with parameter Beta.
    Tau -   scalar value Tau. If X is a null vector, Tau equals 0,
            otherwise 1 <= Tau <= 2.

This subroutine is the modification of the DLARFG subroutines from
the LAPACK library.

MODIFICATIONS:
    24.12.2005 sign(Alpha) was replaced with an analogous to the Fortran SIGN code.

  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
void generatereflection(/* Real    */ ae_vector* x,
     ae_int_t n,
     double* tau,
     ae_state *_state)
{
    ae_int_t j;
    double alpha;
    double xnorm;
    double v;
    double beta;
    double mx;
    double s;

    *tau = 0;

    if( n<=1 )
    {
        *tau = (double)(0);
        return;
    }

    /*
     * Scale if needed (to avoid overflow/underflow during intermediate
     * calculations).
     */
    mx = (double)(0);
    for(j=1; j<=n; j++)
    {
        mx = ae_maxreal(ae_fabs(x->ptr.p_double[j], _state), mx, _state);
    }
    s = (double)(1);
    if( ae_fp_neq(mx,(double)(0)) )
    {
        if( ae_fp_less_eq(mx,ae_minrealnumber/ae_machineepsilon) )
        {
            s = ae_minrealnumber/ae_machineepsilon;
            v = 1/s;
            ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
            mx = mx*v;
        }
        else
        {
            if( ae_fp_greater_eq(mx,ae_maxrealnumber*ae_machineepsilon) )
            {
                s = ae_maxrealnumber*ae_machineepsilon;
                v = 1/s;
                ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
                mx = mx*v;
            }
        }
    }

    /*
     * XNORM = DNRM2( N-1, X, INCX )
     */
    alpha = x->ptr.p_double[1];
    xnorm = (double)(0);
    if( ae_fp_neq(mx,(double)(0)) )
    {
        for(j=2; j<=n; j++)
        {
            xnorm = xnorm+ae_sqr(x->ptr.p_double[j]/mx, _state);
        }
        xnorm = ae_sqrt(xnorm, _state)*mx;
    }
    if( ae_fp_eq(xnorm,(double)(0)) )
    {

        /*
         * H  =  I
         */
        *tau = (double)(0);
        x->ptr.p_double[1] = x->ptr.p_double[1]*s;
        return;
    }

    /*
     * general case
     */
    mx = ae_maxreal(ae_fabs(alpha, _state), ae_fabs(xnorm, _state), _state);
    beta = -mx*ae_sqrt(ae_sqr(alpha/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
    if( ae_fp_less(alpha,(double)(0)) )
    {
        beta = -beta;
    }
    *tau = (beta-alpha)/beta;
    v = 1/(alpha-beta);
    ae_v_muld(&x->ptr.p_double[2], 1, ae_v_len(2,n), v);
    x->ptr.p_double[1] = beta;

    /*
     * Scale back outputs
     */
    x->ptr.p_double[1] = x->ptr.p_double[1]*s;
}


/*************************************************************************
Application of an elementary reflection to a rectangular matrix of size MxN

The algorithm pre-multiplies the matrix by an elementary reflection transformation
which is given by column V and scalar Tau (see the description of the
GenerateReflection procedure). Not the whole matrix but only a part of it
is transformed (rows from M1 to M2, columns from N1 to N2). Only the elements
of this submatrix are changed.

Input parameters:
    C       -   matrix to be transformed.
    Tau     -   scalar defining the transformation.
    V       -   column defining the transformation.
                Array whose index ranges within [1..M2-M1+1].
    M1, M2  -   range of rows to be transformed.
    N1, N2  -   range of columns to be transformed.
    WORK    -   working array whose indexes goes from N1 to N2.

Output parameters:
    C       -   the result of multiplying the input matrix C by the
                transformation matrix which is given by Tau and V.
                If N1>N2 or M1>M2, C is not modified.

  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
void applyreflectionfromtheleft(/* Real    */ ae_matrix* c,
     double tau,
     /* Real    */ ae_vector* v,
     ae_int_t m1,
     ae_int_t m2,
     ae_int_t n1,
     ae_int_t n2,
     /* Real    */ ae_vector* work,
     ae_state *_state)
{


    if( (ae_fp_eq(tau,(double)(0))||n1>n2)||m1>m2 )
    {
        return;
    }
    rvectorsetlengthatleast(work, n2-n1+1, _state);
    rmatrixgemv(n2-n1+1, m2-m1+1, 1.0, c, m1, n1, 1, v, 1, 0.0, work, 0, _state);
    rmatrixger(m2-m1+1, n2-n1+1, c, m1, n1, -tau, v, 1, work, 0, _state);
}


/*************************************************************************
Application of an elementary reflection to a rectangular matrix of size MxN

The algorithm post-multiplies the matrix by an elementary reflection transformation
which is given by column V and scalar Tau (see the description of the
GenerateReflection procedure). Not the whole matrix but only a part of it
is transformed (rows from M1 to M2, columns from N1 to N2). Only the
elements of this submatrix are changed.

Input parameters:
    C       -   matrix to be transformed.
    Tau     -   scalar defining the transformation.
    V       -   column defining the transformation.
                Array whose index ranges within [1..N2-N1+1].
    M1, M2  -   range of rows to be transformed.
    N1, N2  -   range of columns to be transformed.
    WORK    -   working array whose indexes goes from M1 to M2.

Output parameters:
    C       -   the result of multiplying the input matrix C by the
                transformation matrix which is given by Tau and V.
                If N1>N2 or M1>M2, C is not modified.

  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
void applyreflectionfromtheright(/* Real    */ ae_matrix* c,
     double tau,
     /* Real    */ ae_vector* v,
     ae_int_t m1,
     ae_int_t m2,
     ae_int_t n1,
     ae_int_t n2,
     /* Real    */ ae_vector* work,
     ae_state *_state)
{


    if( (ae_fp_eq(tau,(double)(0))||n1>n2)||m1>m2 )
    {
        return;
    }
    rvectorsetlengthatleast(work, m2-m1+1, _state);
    rmatrixgemv(m2-m1+1, n2-n1+1, 1.0, c, m1, n1, 0, v, 1, 0.0, work, 0, _state);
    rmatrixger(m2-m1+1, n2-n1+1, c, m1, n1, -tau, work, 0, v, 1, _state);
}


/*************************************************************************
Cache-oblivous complex "copy-and-transpose"

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void cmatrixtranspose(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     /* Complex */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t s1;
    ae_int_t s2;


    if( m<=2*ablascomplexblocksize(a, _state)&&n<=2*ablascomplexblocksize(a, _state) )
    {

        /*
         * base case
         */
        for(i=0; i<=m-1; i++)
        {
            ae_v_cmove(&b->ptr.pp_complex[ib][jb+i], b->stride, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(ib,ib+n-1));
        }
    }
    else
    {

        /*
         * Cache-oblivious recursion
         */
        if( m>n )
        {
            ablascomplexsplitlength(a, m, &s1, &s2, _state);
            cmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state);
            cmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state);
        }
        else
        {
            ablascomplexsplitlength(a, n, &s1, &s2, _state);
            cmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state);
            cmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state);
        }
    }
}


/*************************************************************************
Cache-oblivous real "copy-and-transpose"

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void rmatrixtranspose(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     /* Real    */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t s1;
    ae_int_t s2;


    if( m<=2*ablasblocksize(a, _state)&&n<=2*ablasblocksize(a, _state) )
    {

        /*
         * base case
         */
        for(i=0; i<=m-1; i++)
        {
            ae_v_move(&b->ptr.pp_double[ib][jb+i], b->stride, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(ib,ib+n-1));
        }
    }
    else
    {

        /*
         * Cache-oblivious recursion
         */
        if( m>n )
        {
            ablassplitlength(a, m, &s1, &s2, _state);
            rmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state);
            rmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state);
        }
        else
        {
            ablassplitlength(a, n, &s1, &s2, _state);
            rmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state);
            rmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state);
        }
    }
}


/*************************************************************************
This code enforces symmetricy of the matrix by copying Upper part to lower
one (or vice versa).

INPUT PARAMETERS:
    A   -   matrix
    N   -   number of rows/columns
    IsUpper - whether we want to copy upper triangle to lower one (True)
            or vice versa (False).
*************************************************************************/
void rmatrixenforcesymmetricity(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;


    if( isupper )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=i+1; j<=n-1; j++)
            {
                a->ptr.pp_double[j][i] = a->ptr.pp_double[i][j];
            }
        }
    }
    else
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=i+1; j<=n-1; j++)
            {
                a->ptr.pp_double[i][j] = a->ptr.pp_double[j][i];
            }
        }
    }
}


/*************************************************************************
Copy

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void cmatrixcopy(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     /* Complex */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_state *_state)
{
    ae_int_t i;


    if( m==0||n==0 )
    {
        return;
    }
    for(i=0; i<=m-1; i++)
    {
        ae_v_cmove(&b->ptr.pp_complex[ib+i][jb], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(jb,jb+n-1));
    }
}


/*************************************************************************
Copy

Input parameters:
    N   -   subvector size
    A   -   source vector, N elements are copied
    IA  -   source offset (first element index)
    B   -   destination vector, must be large enough to store result
    IB  -   destination offset (first element index)
*************************************************************************/
void rvectorcopy(ae_int_t n,
     /* Real    */ ae_vector* a,
     ae_int_t ia,
     /* Real    */ ae_vector* b,
     ae_int_t ib,
     ae_state *_state)
{


    if( n==0 )
    {
        return;
    }
    if( ia==0&&ib==0 )
    {
        rcopyv(n, a, b, _state);
    }
    else
    {
        rcopyvx(n, a, ia, b, ib, _state);
    }
}


/*************************************************************************
Copy

Input parameters:
    M   -   number of rows
    N   -   number of columns
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void rmatrixcopy(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     /* Real    */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_state *_state)
{
    ae_int_t i;


    if( m==0||n==0 )
    {
        return;
    }
    for(i=0; i<=m-1; i++)
    {
        ae_v_move(&b->ptr.pp_double[ib+i][jb], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(jb,jb+n-1));
    }
}


/*************************************************************************
Performs generalized copy: B := Beta*B + Alpha*A.

If Beta=0, then previous contents of B is simply ignored. If Alpha=0, then
A is ignored and not referenced. If both Alpha and Beta  are  zero,  B  is
filled by zeros.

Input parameters:
    M   -   number of rows
    N   -   number of columns
    Alpha-  coefficient
    A   -   source matrix, MxN submatrix is copied and transposed
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    Beta-   coefficient
    B   -   destination matrix, must be large enough to store result
    IB  -   submatrix offset (row index)
    JB  -   submatrix offset (column index)
*************************************************************************/
void rmatrixgencopy(ae_int_t m,
     ae_int_t n,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     double beta,
     /* Real    */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;


    if( m==0||n==0 )
    {
        return;
    }

    /*
     * Zero-fill
     */
    if( ae_fp_eq(alpha,(double)(0))&&ae_fp_eq(beta,(double)(0)) )
    {
        for(i=0; i<=m-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                b->ptr.pp_double[ib+i][jb+j] = (double)(0);
            }
        }
        return;
    }

    /*
     * Inplace multiply
     */
    if( ae_fp_eq(alpha,(double)(0)) )
    {
        for(i=0; i<=m-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                b->ptr.pp_double[ib+i][jb+j] = beta*b->ptr.pp_double[ib+i][jb+j];
            }
        }
        return;
    }

    /*
     * Multiply and copy
     */
    if( ae_fp_eq(beta,(double)(0)) )
    {
        for(i=0; i<=m-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                b->ptr.pp_double[ib+i][jb+j] = alpha*a->ptr.pp_double[ia+i][ja+j];
            }
        }
        return;
    }

    /*
     * Generic
     */
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            b->ptr.pp_double[ib+i][jb+j] = alpha*a->ptr.pp_double[ia+i][ja+j]+beta*b->ptr.pp_double[ib+i][jb+j];
        }
    }
}


/*************************************************************************
Rank-1 correction: A := A + alpha*u*v'

NOTE: this  function  expects  A  to  be  large enough to store result. No
      automatic preallocation happens for  smaller  arrays.  No  integrity
      checks is performed for sizes of A, u, v.

INPUT PARAMETERS:
    M   -   number of rows
    N   -   number of columns
    A   -   target matrix, MxN submatrix is updated
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    Alpha-  coefficient
    U   -   vector #1
    IU  -   subvector offset
    V   -   vector #2
    IV  -   subvector offset


  -- ALGLIB routine --

     16.10.2017
     Bochkanov Sergey
*************************************************************************/
void rmatrixger(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     double alpha,
     /* Real    */ ae_vector* u,
     ae_int_t iu,
     /* Real    */ ae_vector* v,
     ae_int_t iv,
     ae_state *_state)
{
    ae_int_t i;
    double s;


    /*
     * Quick exit
     */
    if( m<=0||n<=0 )
    {
        return;
    }

    /*
     * Try fast kernels:
     * * vendor kernel
     * * internal kernel
     */
    if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel first
         */
        if( rmatrixgermkl(m, n, a, ia, ja, alpha, u, iu, v, iv, _state) )
        {
            return;
        }
    }
    if( rmatrixgerf(m, n, a, ia, ja, alpha, u, iu, v, iv, _state) )
    {
        return;
    }

    /*
     * Generic code
     */
    for(i=0; i<=m-1; i++)
    {
        s = alpha*u->ptr.p_double[iu+i];
        ae_v_addd(&a->ptr.pp_double[ia+i][ja], 1, &v->ptr.p_double[iv], 1, ae_v_len(ja,ja+n-1), s);
    }
}


/*************************************************************************
Rank-1 correction: A := A + u*v'

INPUT PARAMETERS:
    M   -   number of rows
    N   -   number of columns
    A   -   target matrix, MxN submatrix is updated
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    U   -   vector #1
    IU  -   subvector offset
    V   -   vector #2
    IV  -   subvector offset
*************************************************************************/
void cmatrixrank1(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     /* Complex */ ae_vector* u,
     ae_int_t iu,
     /* Complex */ ae_vector* v,
     ae_int_t iv,
     ae_state *_state)
{
    ae_int_t i;
    ae_complex s;


    /*
     * Quick exit
     */
    if( m<=0||n<=0 )
    {
        return;
    }

    /*
     * Try fast kernels:
     * * vendor kernel
     * * internal kernel
     */
    if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel first
         */
        if( cmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv, _state) )
        {
            return;
        }
    }
    if( cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) )
    {
        return;
    }

    /*
     * Generic code
     */
    for(i=0; i<=m-1; i++)
    {
        s = u->ptr.p_complex[iu+i];
        ae_v_caddc(&a->ptr.pp_complex[ia+i][ja], 1, &v->ptr.p_complex[iv], 1, "N", ae_v_len(ja,ja+n-1), s);
    }
}


/*************************************************************************
IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGER()
           which is more generic version of this function.

Rank-1 correction: A := A + u*v'

INPUT PARAMETERS:
    M   -   number of rows
    N   -   number of columns
    A   -   target matrix, MxN submatrix is updated
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    U   -   vector #1
    IU  -   subvector offset
    V   -   vector #2
    IV  -   subvector offset
*************************************************************************/
void rmatrixrank1(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     /* Real    */ ae_vector* u,
     ae_int_t iu,
     /* Real    */ ae_vector* v,
     ae_int_t iv,
     ae_state *_state)
{
    ae_int_t i;
    double s;


    /*
     * Quick exit
     */
    if( m<=0||n<=0 )
    {
        return;
    }

    /*
     * Try fast kernels:
     * * vendor kernel
     * * internal kernel
     */
    if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel first
         */
        if( rmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv, _state) )
        {
            return;
        }
    }
    if( rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) )
    {
        return;
    }

    /*
     * Generic code
     */
    for(i=0; i<=m-1; i++)
    {
        s = u->ptr.p_double[iu+i];
        ae_v_addd(&a->ptr.pp_double[ia+i][ja], 1, &v->ptr.p_double[iv], 1, ae_v_len(ja,ja+n-1), s);
    }
}


void rmatrixgemv(ae_int_t m,
     ae_int_t n,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t opa,
     /* Real    */ ae_vector* x,
     ae_int_t ix,
     double beta,
     /* Real    */ ae_vector* y,
     ae_int_t iy,
     ae_state *_state)
{


    /*
     * Quick exit for M=0, N=0 or Alpha=0.
     *
     * After this block we have M>0, N>0, Alpha<>0.
     */
    if( m<=0 )
    {
        return;
    }
    if( n<=0||ae_fp_eq(alpha,0.0) )
    {
        if( ae_fp_neq(beta,(double)(0)) )
        {
            rmulvx(m, beta, y, iy, _state);
        }
        else
        {
            rsetvx(m, 0.0, y, iy, _state);
        }
        return;
    }

    /*
     * Try fast kernels
     */
    if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel
         */
        if( rmatrixgemvmkl(m, n, alpha, a, ia, ja, opa, x, ix, beta, y, iy, _state) )
        {
            return;
        }
    }
    if( ia+ja+ix+iy==0 )
    {
        rgemv(m, n, alpha, a, opa, x, beta, y, _state);
    }
    else
    {
        rgemvx(m, n, alpha, a, ia, ja, opa, x, ix, beta, y, iy, _state);
    }
}


/*************************************************************************
Matrix-vector product: y := op(A)*x

INPUT PARAMETERS:
    M   -   number of rows of op(A)
            M>=0
    N   -   number of columns of op(A)
            N>=0
    A   -   target matrix
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    OpA -   operation type:
            * OpA=0     =>  op(A) = A
            * OpA=1     =>  op(A) = A^T
            * OpA=2     =>  op(A) = A^H
    X   -   input vector
    IX  -   subvector offset
    IY  -   subvector offset
    Y   -   preallocated matrix, must be large enough to store result

OUTPUT PARAMETERS:
    Y   -   vector which stores result

if M=0, then subroutine does nothing.
if N=0, Y is filled by zeros.


  -- ALGLIB routine --

     28.01.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixmv(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t opa,
     /* Complex */ ae_vector* x,
     ae_int_t ix,
     /* Complex */ ae_vector* y,
     ae_int_t iy,
     ae_state *_state)
{
    ae_int_t i;
    ae_complex v;


    /*
     * Quick exit
     */
    if( m==0 )
    {
        return;
    }
    if( n==0 )
    {
        for(i=0; i<=m-1; i++)
        {
            y->ptr.p_complex[iy+i] = ae_complex_from_i(0);
        }
        return;
    }

    /*
     * Try fast kernels
     */
    if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel
         */
        if( cmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy, _state) )
        {
            return;
        }
    }

    /*
     * Generic code
     */
    if( opa==0 )
    {

        /*
         * y = A*x
         */
        for(i=0; i<=m-1; i++)
        {
            v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &x->ptr.p_complex[ix], 1, "N", ae_v_len(ja,ja+n-1));
            y->ptr.p_complex[iy+i] = v;
        }
        return;
    }
    if( opa==1 )
    {

        /*
         * y = A^T*x
         */
        for(i=0; i<=m-1; i++)
        {
            y->ptr.p_complex[iy+i] = ae_complex_from_i(0);
        }
        for(i=0; i<=n-1; i++)
        {
            v = x->ptr.p_complex[ix+i];
            ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(iy,iy+m-1), v);
        }
        return;
    }
    if( opa==2 )
    {

        /*
         * y = A^H*x
         */
        for(i=0; i<=m-1; i++)
        {
            y->ptr.p_complex[iy+i] = ae_complex_from_i(0);
        }
        for(i=0; i<=n-1; i++)
        {
            v = x->ptr.p_complex[ix+i];
            ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "Conj", ae_v_len(iy,iy+m-1), v);
        }
        return;
    }
}


/*************************************************************************
IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGEMV()
           which is more generic version of this function.

Matrix-vector product: y := op(A)*x

INPUT PARAMETERS:
    M   -   number of rows of op(A)
    N   -   number of columns of op(A)
    A   -   target matrix
    IA  -   submatrix offset (row index)
    JA  -   submatrix offset (column index)
    OpA -   operation type:
            * OpA=0     =>  op(A) = A
            * OpA=1     =>  op(A) = A^T
    X   -   input vector
    IX  -   subvector offset
    IY  -   subvector offset
    Y   -   preallocated matrix, must be large enough to store result

OUTPUT PARAMETERS:
    Y   -   vector which stores result

if M=0, then subroutine does nothing.
if N=0, Y is filled by zeros.


  -- ALGLIB routine --

     28.01.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixmv(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t opa,
     /* Real    */ ae_vector* x,
     ae_int_t ix,
     /* Real    */ ae_vector* y,
     ae_int_t iy,
     ae_state *_state)
{
    ae_int_t i;
    double v;


    /*
     * Quick exit
     */
    if( m==0 )
    {
        return;
    }
    if( n==0 )
    {
        for(i=0; i<=m-1; i++)
        {
            y->ptr.p_double[iy+i] = (double)(0);
        }
        return;
    }

    /*
     * Try fast kernels
     */
    if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel
         */
        if( rmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy, _state) )
        {
            return;
        }
    }

    /*
     * Generic code
     */
    if( opa==0 )
    {

        /*
         * y = A*x
         */
        for(i=0; i<=m-1; i++)
        {
            v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &x->ptr.p_double[ix], 1, ae_v_len(ja,ja+n-1));
            y->ptr.p_double[iy+i] = v;
        }
        return;
    }
    if( opa==1 )
    {

        /*
         * y = A^T*x
         */
        for(i=0; i<=m-1; i++)
        {
            y->ptr.p_double[iy+i] = (double)(0);
        }
        for(i=0; i<=n-1; i++)
        {
            v = x->ptr.p_double[ix+i];
            ae_v_addd(&y->ptr.p_double[iy], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(iy,iy+m-1), v);
        }
        return;
    }
}


void rmatrixsymv(ae_int_t n,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_bool isupper,
     /* Real    */ ae_vector* x,
     ae_int_t ix,
     double beta,
     /* Real    */ ae_vector* y,
     ae_int_t iy,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    double v;
    double vr;
    double vx;


    /*
     * Quick exit for M=0, N=0 or Alpha=0.
     *
     * After this block we have M>0, N>0, Alpha<>0.
     */
    if( n<=0 )
    {
        return;
    }
    if( ae_fp_eq(alpha,0.0) )
    {
        if( ae_fp_neq(beta,(double)(0)) )
        {
            for(i=0; i<=n-1; i++)
            {
                y->ptr.p_double[iy+i] = beta*y->ptr.p_double[iy+i];
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                y->ptr.p_double[iy+i] = 0.0;
            }
        }
        return;
    }

    /*
     * Try fast kernels
     */
    if( n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel
         */
        if( rmatrixsymvmkl(n, alpha, a, ia, ja, isupper, x, ix, beta, y, iy, _state) )
        {
            return;
        }
    }

    /*
     * Generic code
     */
    if( ae_fp_neq(beta,(double)(0)) )
    {
        for(i=0; i<=n-1; i++)
        {
            y->ptr.p_double[iy+i] = beta*y->ptr.p_double[iy+i];
        }
    }
    else
    {
        for(i=0; i<=n-1; i++)
        {
            y->ptr.p_double[iy+i] = 0.0;
        }
    }
    if( isupper )
    {

        /*
         * Upper triangle of A is stored
         */
        for(i=0; i<=n-1; i++)
        {

            /*
             * Process diagonal element
             */
            v = alpha*a->ptr.pp_double[ia+i][ja+i];
            y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+v*x->ptr.p_double[ix+i];

            /*
             * Process off-diagonal elements
             */
            vr = 0.0;
            vx = x->ptr.p_double[ix+i];
            for(j=i+1; j<=n-1; j++)
            {
                v = alpha*a->ptr.pp_double[ia+i][ja+j];
                y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*vx;
                vr = vr+v*x->ptr.p_double[ix+j];
            }
            y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+vr;
        }
    }
    else
    {

        /*
         * Lower triangle of A is stored
         */
        for(i=0; i<=n-1; i++)
        {

            /*
             * Process diagonal element
             */
            v = alpha*a->ptr.pp_double[ia+i][ja+i];
            y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+v*x->ptr.p_double[ix+i];

            /*
             * Process off-diagonal elements
             */
            vr = 0.0;
            vx = x->ptr.p_double[ix+i];
            for(j=0; j<=i-1; j++)
            {
                v = alpha*a->ptr.pp_double[ia+i][ja+j];
                y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*vx;
                vr = vr+v*x->ptr.p_double[ix+j];
            }
            y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+vr;
        }
    }
}


double rmatrixsyvmv(ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_bool isupper,
     /* Real    */ ae_vector* x,
     ae_int_t ix,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    double result;


    /*
     * Quick exit for N=0
     */
    if( n<=0 )
    {
        result = (double)(0);
        return result;
    }

    /*
     * Generic code
     */
    rmatrixsymv(n, 1.0, a, ia, ja, isupper, x, ix, 0.0, tmp, 0, _state);
    result = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        result = result+x->ptr.p_double[ix+i]*tmp->ptr.p_double[i];
    }
    return result;
}


/*************************************************************************
This subroutine solves linear system op(A)*x=b where:
* A is NxN upper/lower triangular/unitriangular matrix
* X and B are Nx1 vectors
* "op" may be identity transformation or transposition

Solution replaces X.

IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
           * no integrity checks for operand sizes, out-of-bounds accesses
             and so on is performed

INPUT PARAMETERS
    N   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
    IA      -   submatrix offset
    JA      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    X       -   right part, actual vector is stored in X[IX:IX+N-1]
    IX      -   offset

OUTPUT PARAMETERS
    X       -   solution replaces elements X[IX:IX+N-1]

  -- ALGLIB routine / remastering of LAPACK's DTRSV --
     (c) 2017 Bochkanov Sergey - converted to ALGLIB
     (c) 2016 Reference BLAS level1 routine (LAPACK version 3.7.0)
     Reference BLAS is a software package provided by Univ. of Tennessee,
     Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.
*************************************************************************/
void rmatrixtrsv(ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_vector* x,
     ae_int_t ix,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    double v;


    /*
     * Quick exit
     */
    if( n<=0 )
    {
        return;
    }

    /*
     * Try fast kernels
     */
    if( n>ablas_blas2minvendorkernelsize )
    {

        /*
         * Try MKL kernel
         */
        if( rmatrixtrsvmkl(n, a, ia, ja, isupper, isunit, optype, x, ix, _state) )
        {
            return;
        }
    }

    /*
     * Generic code
     */
    if( optype==0&&isupper )
    {
        for(i=n-1; i>=0; i--)
        {
            v = x->ptr.p_double[ix+i];
            for(j=i+1; j<=n-1; j++)
            {
                v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
            }
            if( !isunit )
            {
                v = v/a->ptr.pp_double[ia+i][ja+i];
            }
            x->ptr.p_double[ix+i] = v;
        }
        return;
    }
    if( optype==0&&!isupper )
    {
        for(i=0; i<=n-1; i++)
        {
            v = x->ptr.p_double[ix+i];
            for(j=0; j<=i-1; j++)
            {
                v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
            }
            if( !isunit )
            {
                v = v/a->ptr.pp_double[ia+i][ja+i];
            }
            x->ptr.p_double[ix+i] = v;
        }
        return;
    }
    if( optype==1&&isupper )
    {
        for(i=0; i<=n-1; i++)
        {
            v = x->ptr.p_double[ix+i];
            if( !isunit )
            {
                v = v/a->ptr.pp_double[ia+i][ja+i];
            }
            x->ptr.p_double[ix+i] = v;
            if( v==0 )
            {
                continue;
            }
            for(j=i+1; j<=n-1; j++)
            {
                x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
            }
        }
        return;
    }
    if( optype==1&&!isupper )
    {
        for(i=n-1; i>=0; i--)
        {
            v = x->ptr.p_double[ix+i];
            if( !isunit )
            {
                v = v/a->ptr.pp_double[ia+i][ja+i];
            }
            x->ptr.p_double[ix+i] = v;
            if( v==0 )
            {
                continue;
            }
            for(j=0; j<=i-1; j++)
            {
                x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
            }
        }
        return;
    }
    ae_assert(ae_false, "RMatrixTRSV: unexpected operation type", _state);
}


/*************************************************************************
This subroutine calculates X*op(A^-1) where:
* X is MxN general matrix
* A is NxN upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition, conjugate transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     20.01.2018
     Bochkanov Sergey
*************************************************************************/
void cmatrixrighttrsm(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Complex */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax2(m, n, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "CMatrixRightTRSM: integrity check failed", _state);

    /*
     * Upper level parallelization:
     * * decide whether it is feasible to activate multithreading
     * * perform optionally parallelized splits on M
     */
    if( m>=2*tsb&&ae_fp_greater_eq(4*rmul3((double)(m), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_cmatrixrighttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
        {
            return;
        }
    }
    if( m>=2*tsb )
    {

        /*
         * Split X: X*A = (X1 X2)^T*A
         */
        tiledsplit(m, tsb, &s1, &s2, _state);
        cmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        cmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
        return;
    }

    /*
     * Basecase: either MKL-supported code or ALGLIB basecase code
     */
    if( imax2(m, n, _state)<=tsb )
    {
        if( cmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
        {
            return;
        }
    }
    if( imax2(m, n, _state)<=tsa )
    {
        ablas_cmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        return;
    }

    /*
     * Recursive subdivision
     */
    if( m>=n )
    {

        /*
         * Split X: X*A = (X1 X2)^T*A
         */
        tiledsplit(m, tscur, &s1, &s2, _state);
        cmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        cmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
    }
    else
    {

        /*
         * Split A:
         *               (A1  A12)
         * X*op(A) = X*op(       )
         *               (     A2)
         *
         * Different variants depending on
         * IsUpper/OpType combinations
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        if( isupper&&optype==0 )
        {

            /*
             *                  (A1  A12)-1
             * X*A^-1 = (X1 X2)*(       )
             *                  (     A2)
             */
            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1, j1+s1, 0, ae_complex_from_d(1.0), x, i2, j2+s1, _state);
            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
        }
        if( isupper&&optype!=0 )
        {

            /*
             *                  (A1'     )-1
             * X*A^-1 = (X1 X2)*(        )
             *                  (A12' A2')
             */
            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
            cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1, j1+s1, optype, ae_complex_from_d(1.0), x, i2, j2, _state);
            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
        if( !isupper&&optype==0 )
        {

            /*
             *                  (A1     )-1
             * X*A^-1 = (X1 X2)*(       )
             *                  (A21  A2)
             */
            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
            cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1+s1, j1, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
        if( !isupper&&optype!=0 )
        {

            /*
             *                  (A1' A21')-1
             * X*A^-1 = (X1 X2)*(        )
             *                  (     A2')
             */
            cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1+s1, j1, optype, ae_complex_from_d(1.0), x, i2, j2+s1, _state);
            cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
        }
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_cmatrixrighttrsm(ae_int_t m,
    ae_int_t n,
    /* Complex */ ae_matrix* a,
    ae_int_t i1,
    ae_int_t j1,
    ae_bool isupper,
    ae_bool isunit,
    ae_int_t optype,
    /* Complex */ ae_matrix* x,
    ae_int_t i2,
    ae_int_t j2,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine calculates op(A^-1)*X where:
* X is MxN general matrix
* A is MxM upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition, conjugate transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void cmatrixlefttrsm(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Complex */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax2(m, n, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "CMatrixLeftTRSM: integrity check failed", _state);

    /*
     * Upper level parallelization:
     * * decide whether it is feasible to activate multithreading
     * * perform optionally parallelized splits on N
     */
    if( n>=2*tsb&&ae_fp_greater_eq(4*rmul3((double)(n), (double)(m), (double)(m), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_cmatrixlefttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
        {
            return;
        }
    }
    if( n>=2*tsb )
    {
        tiledsplit(n, tscur, &s1, &s2, _state);
        cmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
        cmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        return;
    }

    /*
     * Basecase: either MKL-supported code or ALGLIB basecase code
     */
    if( imax2(m, n, _state)<=tsb )
    {
        if( cmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
        {
            return;
        }
    }
    if( imax2(m, n, _state)<=tsa )
    {
        ablas_cmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        return;
    }

    /*
     * Recursive subdivision
     */
    if( n>=m )
    {

        /*
         * Split X: op(A)^-1*X = op(A)^-1*(X1 X2)
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        cmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        cmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
    }
    else
    {

        /*
         * Split A
         */
        tiledsplit(m, tscur, &s1, &s2, _state);
        if( isupper&&optype==0 )
        {

            /*
             *           (A1  A12)-1  ( X1 )
             * A^-1*X* = (       )   *(    )
             *           (     A2)    ( X2 )
             */
            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
            cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1, j1+s1, 0, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
        if( isupper&&optype!=0 )
        {

            /*
             *          (A1'     )-1 ( X1 )
             * A^-1*X = (        )  *(    )
             *          (A12' A2')   ( X2 )
             */
            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1, j1+s1, optype, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state);
            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
        }
        if( !isupper&&optype==0 )
        {

            /*
             *          (A1     )-1 ( X1 )
             * A^-1*X = (       )  *(    )
             *          (A21  A2)   ( X2 )
             */
            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1+s1, j1, 0, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state);
            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
        }
        if( !isupper&&optype!=0 )
        {

            /*
             *          (A1' A21')-1 ( X1 )
             * A^-1*X = (        )  *(    )
             *          (     A2')   ( X2 )
             */
            cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
            cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1+s1, j1, optype, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
            cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_cmatrixlefttrsm(ae_int_t m,
    ae_int_t n,
    /* Complex */ ae_matrix* a,
    ae_int_t i1,
    ae_int_t j1,
    ae_bool isupper,
    ae_bool isunit,
    ae_int_t optype,
    /* Complex */ ae_matrix* x,
    ae_int_t i2,
    ae_int_t j2,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine calculates X*op(A^-1) where:
* X is MxN general matrix
* A is NxN upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void rmatrixrighttrsm(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax2(m, n, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "RMatrixRightTRSM: integrity check failed", _state);

    /*
     * Upper level parallelization:
     * * decide whether it is feasible to activate multithreading
     * * perform optionally parallelized splits on M
     */
    if( m>=2*tsb&&ae_fp_greater_eq(rmul3((double)(m), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_rmatrixrighttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
        {
            return;
        }
    }
    if( m>=2*tsb )
    {

        /*
         * Split X: X*A = (X1 X2)^T*A
         */
        tiledsplit(m, tsb, &s1, &s2, _state);
        rmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        rmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
        return;
    }

    /*
     * Basecase: MKL or ALGLIB code
     */
    if( imax2(m, n, _state)<=tsb )
    {
        if( rmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
        {
            return;
        }
    }
    if( imax2(m, n, _state)<=tsa )
    {
        ablas_rmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        return;
    }

    /*
     * Recursive subdivision
     */
    if( m>=n )
    {

        /*
         * Split X: X*A = (X1 X2)^T*A
         */
        tiledsplit(m, tscur, &s1, &s2, _state);
        rmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        rmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
    }
    else
    {

        /*
         * Split A:
         *               (A1  A12)
         * X*op(A) = X*op(       )
         *               (     A2)
         *
         * Different variants depending on
         * IsUpper/OpType combinations
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        if( isupper&&optype==0 )
        {

            /*
             *                  (A1  A12)-1
             * X*A^-1 = (X1 X2)*(       )
             *                  (     A2)
             */
            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1, j1+s1, 0, 1.0, x, i2, j2+s1, _state);
            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
        }
        if( isupper&&optype!=0 )
        {

            /*
             *                  (A1'     )-1
             * X*A^-1 = (X1 X2)*(        )
             *                  (A12' A2')
             */
            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
            rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1, j1+s1, optype, 1.0, x, i2, j2, _state);
            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
        if( !isupper&&optype==0 )
        {

            /*
             *                  (A1     )-1
             * X*A^-1 = (X1 X2)*(       )
             *                  (A21  A2)
             */
            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
            rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1+s1, j1, 0, 1.0, x, i2, j2, _state);
            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
        if( !isupper&&optype!=0 )
        {

            /*
             *                  (A1' A21')-1
             * X*A^-1 = (X1 X2)*(        )
             *                  (     A2')
             */
            rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1+s1, j1, optype, 1.0, x, i2, j2+s1, _state);
            rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
        }
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_rmatrixrighttrsm(ae_int_t m,
    ae_int_t n,
    /* Real    */ ae_matrix* a,
    ae_int_t i1,
    ae_int_t j1,
    ae_bool isupper,
    ae_bool isunit,
    ae_int_t optype,
    /* Real    */ ae_matrix* x,
    ae_int_t i2,
    ae_int_t j2,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine calculates op(A^-1)*X where:
* X is MxN general matrix
* A is MxM upper/lower triangular/unitriangular matrix
* "op" may be identity transformation, transposition
Multiplication result replaces X.

INPUT PARAMETERS
    N   -   matrix size, N>=0
    M   -   matrix size, N>=0
    A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
    I1      -   submatrix offset
    J1      -   submatrix offset
    IsUpper -   whether matrix is upper triangular
    IsUnit  -   whether matrix is unitriangular
    OpType  -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
    I2  -   submatrix offset
    J2  -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void rmatrixlefttrsm(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax2(m, n, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "RMatrixLeftTRSMRec: integrity check failed", _state);

    /*
     * Upper level parallelization:
     * * decide whether it is feasible to activate multithreading
     * * perform optionally parallelized splits on N
     */
    if( n>=2*tsb&&ae_fp_greater_eq(rmul3((double)(n), (double)(m), (double)(m), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_rmatrixlefttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
        {
            return;
        }
    }
    if( n>=2*tsb )
    {
        tiledsplit(n, tscur, &s1, &s2, _state);
        rmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
        rmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        return;
    }

    /*
     * Basecase: MKL or ALGLIB code
     */
    if( imax2(m, n, _state)<=tsb )
    {
        if( rmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
        {
            return;
        }
    }
    if( imax2(m, n, _state)<=tsa )
    {
        ablas_rmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        return;
    }

    /*
     * Recursive subdivision
     */
    if( n>=m )
    {

        /*
         * Split X: op(A)^-1*X = op(A)^-1*(X1 X2)
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        rmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        rmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
    }
    else
    {

        /*
         * Split A
         */
        tiledsplit(m, tscur, &s1, &s2, _state);
        if( isupper&&optype==0 )
        {

            /*
             *           (A1  A12)-1  ( X1 )
             * A^-1*X* = (       )   *(    )
             *           (     A2)    ( X2 )
             */
            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
            rmatrixgemm(s1, n, s2, -1.0, a, i1, j1+s1, 0, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state);
            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
        if( isupper&&optype!=0 )
        {

            /*
             *          (A1'     )-1 ( X1 )
             * A^-1*X = (        )  *(    )
             *          (A12' A2')   ( X2 )
             */
            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            rmatrixgemm(s2, n, s1, -1.0, a, i1, j1+s1, optype, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state);
            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
        }
        if( !isupper&&optype==0 )
        {

            /*
             *          (A1     )-1 ( X1 )
             * A^-1*X = (       )  *(    )
             *          (A21  A2)   ( X2 )
             */
            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
            rmatrixgemm(s2, n, s1, -1.0, a, i1+s1, j1, 0, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state);
            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
        }
        if( !isupper&&optype!=0 )
        {

            /*
             *          (A1' A21')-1 ( X1 )
             * A^-1*X = (        )  *(    )
             *          (     A2')   ( X2 )
             */
            rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
            rmatrixgemm(s1, n, s2, -1.0, a, i1+s1, j1, optype, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state);
            rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
        }
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_rmatrixlefttrsm(ae_int_t m,
    ae_int_t n,
    /* Real    */ ae_matrix* a,
    ae_int_t i1,
    ae_int_t j1,
    ae_bool isupper,
    ae_bool isunit,
    ae_int_t optype,
    /* Real    */ ae_matrix* x,
    ae_int_t i2,
    ae_int_t j2,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine calculates  C=alpha*A*A^H+beta*C  or  C=alpha*A^H*A+beta*C
where:
* C is NxN Hermitian matrix given by its upper/lower triangle
* A is NxK matrix when A*A^H is calculated, KxN matrix otherwise

Additional info:
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

INPUT PARAMETERS
    N       -   matrix size, N>=0
    K       -   matrix size, K>=0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset (row index)
    JA      -   submatrix offset (column index)
    OpTypeA -   multiplication type:
                * 0 - A*A^H is calculated
                * 2 - A^H*A is calculated
    Beta    -   coefficient
    C       -   preallocated input/output matrix
    IC      -   submatrix offset (row index)
    JC      -   submatrix offset (column index)
    IsUpper -   whether upper or lower triangle of C is updated;
                this function updates only one half of C, leaving
                other half unchanged (not referenced at all).

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     16.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void cmatrixherk(ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     double beta,
     /* Complex */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_bool isupper,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax2(n, k, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "CMatrixHERK: integrity check failed", _state);

    /*
     * Decide whether it is feasible to activate multithreading
     */
    if( n>=2*tsb&&ae_fp_greater_eq(8*rmul3((double)(k), (double)(n), (double)(n), _state)/2,smpactivationlevel(_state)) )
    {
        if( _trypexec_cmatrixherk(n,k,alpha,a,ia,ja,optypea,beta,c,ic,jc,isupper, _state) )
        {
            return;
        }
    }

    /*
     * Use MKL or ALGLIB basecase code
     */
    if( imax2(n, k, _state)<=tsb )
    {
        if( cmatrixherkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
        {
            return;
        }
    }
    if( imax2(n, k, _state)<=tsa )
    {
        ablas_cmatrixherk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
        return;
    }

    /*
     * Recursive division of the problem
     */
    if( k>=n )
    {

        /*
         * Split K
         */
        tiledsplit(k, tscur, &s1, &s2, _state);
        if( optypea==0 )
        {
            cmatrixherk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            cmatrixherk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state);
        }
        else
        {
            cmatrixherk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            cmatrixherk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state);
        }
    }
    else
    {

        /*
         * Split N
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        if( optypea==0&&isupper )
        {
            cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            cmatrixherk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 0, a, ia+s1, ja, 2, ae_complex_from_d(beta), c, ic, jc+s1, _state);
        }
        if( optypea==0&&!isupper )
        {
            cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            cmatrixherk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia+s1, ja, 0, a, ia, ja, 2, ae_complex_from_d(beta), c, ic+s1, jc, _state);
        }
        if( optypea!=0&&isupper )
        {
            cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            cmatrixherk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 2, a, ia, ja+s1, 0, ae_complex_from_d(beta), c, ic, jc+s1, _state);
        }
        if( optypea!=0&&!isupper )
        {
            cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            cmatrixherk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia, ja+s1, 2, a, ia, ja, 0, ae_complex_from_d(beta), c, ic+s1, jc, _state);
        }
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_cmatrixherk(ae_int_t n,
    ae_int_t k,
    double alpha,
    /* Complex */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    double beta,
    /* Complex */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc,
    ae_bool isupper,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine calculates  C=alpha*A*A^T+beta*C  or  C=alpha*A^T*A+beta*C
where:
* C is NxN symmetric matrix given by its upper/lower triangle
* A is NxK matrix when A*A^T is calculated, KxN matrix otherwise

Additional info:
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

INPUT PARAMETERS
    N       -   matrix size, N>=0
    K       -   matrix size, K>=0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset (row index)
    JA      -   submatrix offset (column index)
    OpTypeA -   multiplication type:
                * 0 - A*A^T is calculated
                * 2 - A^T*A is calculated
    Beta    -   coefficient
    C       -   preallocated input/output matrix
    IC      -   submatrix offset (row index)
    JC      -   submatrix offset (column index)
    IsUpper -   whether C is upper triangular or lower triangular

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     16.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
void rmatrixsyrk(ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     double beta,
     /* Real    */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_bool isupper,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax2(n, k, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "RMatrixSYRK: integrity check failed", _state);

    /*
     * Decide whether it is feasible to activate multithreading
     */
    if( n>=2*tsb&&ae_fp_greater_eq(2*rmul3((double)(k), (double)(n), (double)(n), _state)/2,smpactivationlevel(_state)) )
    {
        if( _trypexec_rmatrixsyrk(n,k,alpha,a,ia,ja,optypea,beta,c,ic,jc,isupper, _state) )
        {
            return;
        }
    }

    /*
     * Use MKL or generic basecase code
     */
    if( imax2(n, k, _state)<=tsb )
    {
        if( rmatrixsyrkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
        {
            return;
        }
    }
    if( imax2(n, k, _state)<=tsa )
    {
        ablas_rmatrixsyrk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
        return;
    }

    /*
     * Recursive subdivision of the problem
     */
    if( k>=n )
    {

        /*
         * Split K
         */
        tiledsplit(k, tscur, &s1, &s2, _state);
        if( optypea==0 )
        {
            rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            rmatrixsyrk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state);
        }
        else
        {
            rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            rmatrixsyrk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state);
        }
    }
    else
    {

        /*
         * Split N
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        if( optypea==0&&isupper )
        {
            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 0, a, ia+s1, ja, 1, beta, c, ic, jc+s1, _state);
        }
        if( optypea==0&&!isupper )
        {
            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            rmatrixgemm(s2, s1, k, alpha, a, ia+s1, ja, 0, a, ia, ja, 1, beta, c, ic+s1, jc, _state);
        }
        if( optypea!=0&&isupper )
        {
            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 1, a, ia, ja+s1, 0, beta, c, ic, jc+s1, _state);
        }
        if( optypea!=0&&!isupper )
        {
            rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
            rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
            rmatrixgemm(s2, s1, k, alpha, a, ia, ja+s1, 1, a, ia, ja, 0, beta, c, ic+s1, jc, _state);
        }
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_rmatrixsyrk(ae_int_t n,
    ae_int_t k,
    double alpha,
    /* Real    */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    double beta,
    /* Real    */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc,
    ae_bool isupper,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
* C is MxN general matrix
* op1(A) is MxK matrix
* op2(B) is KxN matrix
* "op" may be identity transformation, transposition, conjugate transposition

Additional info:
* cache-oblivious algorithm is used.
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

IMPORTANT:

This function does NOT preallocate output matrix C, it MUST be preallocated
by caller prior to calling this function. In case C does not have  enough
space to store result, exception will be generated.

INPUT PARAMETERS
    M       -   matrix size, M>0
    N       -   matrix size, N>0
    K       -   matrix size, K>0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset
    JA      -   submatrix offset
    OpTypeA -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    B       -   matrix
    IB      -   submatrix offset
    JB      -   submatrix offset
    OpTypeB -   transformation type:
                * 0 - no transformation
                * 1 - transposition
                * 2 - conjugate transposition
    Beta    -   coefficient
    C       -   matrix (PREALLOCATED, large enough to store result)
    IC      -   submatrix offset
    JC      -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     2009-2019
     Bochkanov Sergey
*************************************************************************/
void cmatrixgemm(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     ae_complex alpha,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     /* Complex */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_int_t optypeb,
     ae_complex beta,
     /* Complex */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_state *_state)
{
    ae_int_t ts;


    ts = matrixtilesizeb(_state);

    /*
     * Check input sizes for correctness
     */
    ae_assert((optypea==0||optypea==1)||optypea==2, "CMatrixGEMM: incorrect OpTypeA (must be 0 or 1 or 2)", _state);
    ae_assert((optypeb==0||optypeb==1)||optypeb==2, "CMatrixGEMM: incorrect OpTypeB (must be 0 or 1 or 2)", _state);
    ae_assert(ic+m<=c->rows, "CMatrixGEMM: incorect size of output matrix C", _state);
    ae_assert(jc+n<=c->cols, "CMatrixGEMM: incorect size of output matrix C", _state);

    /*
     * Decide whether it is feasible to activate multithreading
     */
    if( (m>=2*ts||n>=2*ts)&&ae_fp_greater_eq(8*rmul3((double)(m), (double)(n), (double)(k), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_cmatrixgemm(m,n,k,alpha,a,ia,ja,optypea,b,ib,jb,optypeb,beta,c,ic,jc, _state) )
        {
            return;
        }
    }

    /*
     * Start actual work
     */
    ablas_cmatrixgemmrec(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_cmatrixgemm(ae_int_t m,
    ae_int_t n,
    ae_int_t k,
    ae_complex alpha,
    /* Complex */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    /* Complex */ ae_matrix* b,
    ae_int_t ib,
    ae_int_t jb,
    ae_int_t optypeb,
    ae_complex beta,
    /* Complex */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
* C is MxN general matrix
* op1(A) is MxK matrix
* op2(B) is KxN matrix
* "op" may be identity transformation, transposition

Additional info:
* cache-oblivious algorithm is used.
* multiplication result replaces C. If Beta=0, C elements are not used in
  calculations (not multiplied by zero - just not referenced)
* if Alpha=0, A is not used (not multiplied by zero - just not referenced)
* if both Beta and Alpha are zero, C is filled by zeros.

IMPORTANT:

This function does NOT preallocate output matrix C, it MUST be preallocated
by caller prior to calling this function. In case C does not have  enough
space to store result, exception will be generated.

INPUT PARAMETERS
    M       -   matrix size, M>0
    N       -   matrix size, N>0
    K       -   matrix size, K>0
    Alpha   -   coefficient
    A       -   matrix
    IA      -   submatrix offset
    JA      -   submatrix offset
    OpTypeA -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    B       -   matrix
    IB      -   submatrix offset
    JB      -   submatrix offset
    OpTypeB -   transformation type:
                * 0 - no transformation
                * 1 - transposition
    Beta    -   coefficient
    C       -   PREALLOCATED output matrix, large enough to store result
    IC      -   submatrix offset
    JC      -   submatrix offset

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     2009-2019
     Bochkanov Sergey
*************************************************************************/
void rmatrixgemm(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     /* Real    */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_int_t optypeb,
     double beta,
     /* Real    */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_state *_state)
{
    ae_int_t ts;


    ts = matrixtilesizeb(_state);

    /*
     * Check input sizes for correctness
     */
    ae_assert(optypea==0||optypea==1, "RMatrixGEMM: incorrect OpTypeA (must be 0 or 1)", _state);
    ae_assert(optypeb==0||optypeb==1, "RMatrixGEMM: incorrect OpTypeB (must be 0 or 1)", _state);
    ae_assert(ic+m<=c->rows, "RMatrixGEMM: incorect size of output matrix C", _state);
    ae_assert(jc+n<=c->cols, "RMatrixGEMM: incorect size of output matrix C", _state);

    /*
     * Decide whether it is feasible to activate multithreading
     */
    if( (m>=2*ts||n>=2*ts)&&ae_fp_greater_eq(2*rmul3((double)(m), (double)(n), (double)(k), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_rmatrixgemm(m,n,k,alpha,a,ia,ja,optypea,b,ib,jb,optypeb,beta,c,ic,jc, _state) )
        {
            return;
        }
    }

    /*
     * Start actual work
     */
    ablas_rmatrixgemmrec(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_rmatrixgemm(ae_int_t m,
    ae_int_t n,
    ae_int_t k,
    double alpha,
    /* Real    */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    /* Real    */ ae_matrix* b,
    ae_int_t ib,
    ae_int_t jb,
    ae_int_t optypeb,
    double beta,
    /* Real    */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine is an older version of CMatrixHERK(), one with wrong  name
(it is HErmitian update, not SYmmetric). It  is  left  here  for  backward
compatibility.

  -- ALGLIB routine --
     16.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixsyrk(ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     double beta,
     /* Complex */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_bool isupper,
     ae_state *_state)
{


    cmatrixherk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
}


/*************************************************************************
Performs one step  of stable Gram-Schmidt  process  on  vector  X[]  using
set of orthonormal rows Q[].

INPUT PARAMETERS:
    Q       -   array[M,N], matrix with orthonormal rows
    M, N    -   rows/cols
    X       -   array[N], vector to process
    NeedQX  -   whether we need QX or not

OUTPUT PARAMETERS:
    X       -   stores X - Q'*(Q*X)
    QX      -   if NeedQX is True, array[M] filled with elements  of  Q*X,
                reallocated if length is less than M.
                Ignored otherwise.

  -- ALGLIB --
     Copyright 20.01.2020 by Bochkanov Sergey
*************************************************************************/
void rowwisegramschmidt(/* Real    */ ae_matrix* q,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* x,
     /* Real    */ ae_vector* qx,
     ae_bool needqx,
     ae_state *_state)
{
    ae_int_t i;
    double v;


    if( needqx )
    {
        rvectorsetlengthatleast(qx, m, _state);
    }
    for(i=0; i<=m-1; i++)
    {
        v = rdotvr(n, x, q, i, _state);
        raddrv(n, -v, q, i, x, _state);
        if( needqx )
        {
            qx->ptr.p_double[i] = v;
        }
    }
}


/*************************************************************************
Complex ABLASSplitLength

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
static void ablas_ablasinternalsplitlength(ae_int_t n,
     ae_int_t nb,
     ae_int_t* n1,
     ae_int_t* n2,
     ae_state *_state)
{
    ae_int_t r;

    *n1 = 0;
    *n2 = 0;

    if( n<=nb )
    {

        /*
         * Block size, no further splitting
         */
        *n1 = n;
        *n2 = 0;
    }
    else
    {

        /*
         * Greater than block size
         */
        if( n%nb!=0 )
        {

            /*
             * Split remainder
             */
            *n2 = n%nb;
            *n1 = n-(*n2);
        }
        else
        {

            /*
             * Split on block boundaries
             */
            *n2 = n/2;
            *n1 = n-(*n2);
            if( *n1%nb==0 )
            {
                return;
            }
            r = nb-*n1%nb;
            *n1 = *n1+r;
            *n2 = *n2-r;
        }
    }
}


/*************************************************************************
Level 2 variant of CMatrixRightTRSM
*************************************************************************/
static void ablas_cmatrixrighttrsm2(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Complex */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_complex vc;
    ae_complex vd;


    /*
     * Special case
     */
    if( n*m==0 )
    {
        return;
    }

    /*
     * Try to call fast TRSM
     */
    if( cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
    {
        return;
    }

    /*
     * General case
     */
    if( isupper )
    {

        /*
         * Upper triangular matrix
         */
        if( optype==0 )
        {

            /*
             * X*A^(-1)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    if( isunit )
                    {
                        vd = ae_complex_from_i(1);
                    }
                    else
                    {
                        vd = a->ptr.pp_complex[i1+j][j1+j];
                    }
                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd);
                    if( j<n-1 )
                    {
                        vc = x->ptr.pp_complex[i2+i][j2+j];
                        ae_v_csubc(&x->ptr.pp_complex[i2+i][j2+j+1], 1, &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1), vc);
                    }
                }
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * X*A^(-T)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=n-1; j>=0; j--)
                {
                    vc = ae_complex_from_i(0);
                    vd = ae_complex_from_i(1);
                    if( j<n-1 )
                    {
                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1));
                    }
                    if( !isunit )
                    {
                        vd = a->ptr.pp_complex[i1+j][j1+j];
                    }
                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
                }
            }
            return;
        }
        if( optype==2 )
        {

            /*
             * X*A^(-H)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=n-1; j>=0; j--)
                {
                    vc = ae_complex_from_i(0);
                    vd = ae_complex_from_i(1);
                    if( j<n-1 )
                    {
                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "Conj", ae_v_len(j2+j+1,j2+n-1));
                    }
                    if( !isunit )
                    {
                        vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state);
                    }
                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
                }
            }
            return;
        }
    }
    else
    {

        /*
         * Lower triangular matrix
         */
        if( optype==0 )
        {

            /*
             * X*A^(-1)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=n-1; j>=0; j--)
                {
                    if( isunit )
                    {
                        vd = ae_complex_from_i(1);
                    }
                    else
                    {
                        vd = a->ptr.pp_complex[i1+j][j1+j];
                    }
                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd);
                    if( j>0 )
                    {
                        vc = x->ptr.pp_complex[i2+i][j2+j];
                        ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1), vc);
                    }
                }
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * X*A^(-T)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    vc = ae_complex_from_i(0);
                    vd = ae_complex_from_i(1);
                    if( j>0 )
                    {
                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1));
                    }
                    if( !isunit )
                    {
                        vd = a->ptr.pp_complex[i1+j][j1+j];
                    }
                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
                }
            }
            return;
        }
        if( optype==2 )
        {

            /*
             * X*A^(-H)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    vc = ae_complex_from_i(0);
                    vd = ae_complex_from_i(1);
                    if( j>0 )
                    {
                        vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "Conj", ae_v_len(j2,j2+j-1));
                    }
                    if( !isunit )
                    {
                        vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state);
                    }
                    x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
                }
            }
            return;
        }
    }
}


/*************************************************************************
Level-2 subroutine
*************************************************************************/
static void ablas_cmatrixlefttrsm2(ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Complex */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_complex vc;
    ae_complex vd;


    /*
     * Special case
     */
    if( n*m==0 )
    {
        return;
    }

    /*
     * Try to call fast TRSM
     */
    if( cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
    {
        return;
    }

    /*
     * General case
     */
    if( isupper )
    {

        /*
         * Upper triangular matrix
         */
        if( optype==0 )
        {

            /*
             * A^(-1)*X
             */
            for(i=m-1; i>=0; i--)
            {
                for(j=i+1; j<=m-1; j++)
                {
                    vc = a->ptr.pp_complex[i1+i][j1+j];
                    ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
                }
                if( !isunit )
                {
                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
                    ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                }
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * A^(-T)*X
             */
            for(i=0; i<=m-1; i++)
            {
                if( isunit )
                {
                    vd = ae_complex_from_i(1);
                }
                else
                {
                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
                }
                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                for(j=i+1; j<=m-1; j++)
                {
                    vc = a->ptr.pp_complex[i1+i][j1+j];
                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
                }
            }
            return;
        }
        if( optype==2 )
        {

            /*
             * A^(-H)*X
             */
            for(i=0; i<=m-1; i++)
            {
                if( isunit )
                {
                    vd = ae_complex_from_i(1);
                }
                else
                {
                    vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state));
                }
                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                for(j=i+1; j<=m-1; j++)
                {
                    vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state);
                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
                }
            }
            return;
        }
    }
    else
    {

        /*
         * Lower triangular matrix
         */
        if( optype==0 )
        {

            /*
             * A^(-1)*X
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=i-1; j++)
                {
                    vc = a->ptr.pp_complex[i1+i][j1+j];
                    ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
                }
                if( isunit )
                {
                    vd = ae_complex_from_i(1);
                }
                else
                {
                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+j][j1+j]);
                }
                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * A^(-T)*X
             */
            for(i=m-1; i>=0; i--)
            {
                if( isunit )
                {
                    vd = ae_complex_from_i(1);
                }
                else
                {
                    vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
                }
                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                for(j=i-1; j>=0; j--)
                {
                    vc = a->ptr.pp_complex[i1+i][j1+j];
                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
                }
            }
            return;
        }
        if( optype==2 )
        {

            /*
             * A^(-H)*X
             */
            for(i=m-1; i>=0; i--)
            {
                if( isunit )
                {
                    vd = ae_complex_from_i(1);
                }
                else
                {
                    vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state));
                }
                ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                for(j=i-1; j>=0; j--)
                {
                    vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state);
                    ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
                }
            }
            return;
        }
    }
}


/*************************************************************************
Level 2 subroutine

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
static void ablas_rmatrixrighttrsm2(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    double vr;
    double vd;


    /*
     * Special case
     */
    if( n*m==0 )
    {
        return;
    }

    /*
     * Try to use "fast" code
     */
    if( rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
    {
        return;
    }

    /*
     * General case
     */
    if( isupper )
    {

        /*
         * Upper triangular matrix
         */
        if( optype==0 )
        {

            /*
             * X*A^(-1)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    if( isunit )
                    {
                        vd = (double)(1);
                    }
                    else
                    {
                        vd = a->ptr.pp_double[i1+j][j1+j];
                    }
                    x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd;
                    if( j<n-1 )
                    {
                        vr = x->ptr.pp_double[i2+i][j2+j];
                        ae_v_subd(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1), vr);
                    }
                }
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * X*A^(-T)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=n-1; j>=0; j--)
                {
                    vr = (double)(0);
                    vd = (double)(1);
                    if( j<n-1 )
                    {
                        vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1));
                    }
                    if( !isunit )
                    {
                        vd = a->ptr.pp_double[i1+j][j1+j];
                    }
                    x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd;
                }
            }
            return;
        }
    }
    else
    {

        /*
         * Lower triangular matrix
         */
        if( optype==0 )
        {

            /*
             * X*A^(-1)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=n-1; j>=0; j--)
                {
                    if( isunit )
                    {
                        vd = (double)(1);
                    }
                    else
                    {
                        vd = a->ptr.pp_double[i1+j][j1+j];
                    }
                    x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd;
                    if( j>0 )
                    {
                        vr = x->ptr.pp_double[i2+i][j2+j];
                        ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1), vr);
                    }
                }
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * X*A^(-T)
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    vr = (double)(0);
                    vd = (double)(1);
                    if( j>0 )
                    {
                        vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1));
                    }
                    if( !isunit )
                    {
                        vd = a->ptr.pp_double[i1+j][j1+j];
                    }
                    x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd;
                }
            }
            return;
        }
    }
}


/*************************************************************************
Level 2 subroutine
*************************************************************************/
static void ablas_rmatrixlefttrsm2(ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_int_t i1,
     ae_int_t j1,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_matrix* x,
     ae_int_t i2,
     ae_int_t j2,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    double vr;
    double vd;


    /*
     * Special case
     */
    if( n==0||m==0 )
    {
        return;
    }

    /*
     * Try fast code
     */
    if( rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
    {
        return;
    }

    /*
     * General case
     */
    if( isupper )
    {

        /*
         * Upper triangular matrix
         */
        if( optype==0 )
        {

            /*
             * A^(-1)*X
             */
            for(i=m-1; i>=0; i--)
            {
                for(j=i+1; j<=m-1; j++)
                {
                    vr = a->ptr.pp_double[i1+i][j1+j];
                    ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr);
                }
                if( !isunit )
                {
                    vd = 1/a->ptr.pp_double[i1+i][j1+i];
                    ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                }
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * A^(-T)*X
             */
            for(i=0; i<=m-1; i++)
            {
                if( isunit )
                {
                    vd = (double)(1);
                }
                else
                {
                    vd = 1/a->ptr.pp_double[i1+i][j1+i];
                }
                ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                for(j=i+1; j<=m-1; j++)
                {
                    vr = a->ptr.pp_double[i1+i][j1+j];
                    ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr);
                }
            }
            return;
        }
    }
    else
    {

        /*
         * Lower triangular matrix
         */
        if( optype==0 )
        {

            /*
             * A^(-1)*X
             */
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=i-1; j++)
                {
                    vr = a->ptr.pp_double[i1+i][j1+j];
                    ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr);
                }
                if( isunit )
                {
                    vd = (double)(1);
                }
                else
                {
                    vd = 1/a->ptr.pp_double[i1+j][j1+j];
                }
                ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
            }
            return;
        }
        if( optype==1 )
        {

            /*
             * A^(-T)*X
             */
            for(i=m-1; i>=0; i--)
            {
                if( isunit )
                {
                    vd = (double)(1);
                }
                else
                {
                    vd = 1/a->ptr.pp_double[i1+i][j1+i];
                }
                ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
                for(j=i-1; j>=0; j--)
                {
                    vr = a->ptr.pp_double[i1+i][j1+j];
                    ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr);
                }
            }
            return;
        }
    }
}


/*************************************************************************
Level 2 subroutine
*************************************************************************/
static void ablas_cmatrixherk2(ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     double beta,
     /* Complex */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_bool isupper,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t j1;
    ae_int_t j2;
    ae_complex v;


    /*
     * Fast exit (nothing to be done)
     */
    if( (ae_fp_eq(alpha,(double)(0))||k==0)&&ae_fp_eq(beta,(double)(1)) )
    {
        return;
    }

    /*
     * Try to call fast SYRK
     */
    if( cmatrixherkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
    {
        return;
    }

    /*
     * SYRK
     */
    if( optypea==0 )
    {

        /*
         * C=alpha*A*A^H+beta*C
         */
        for(i=0; i<=n-1; i++)
        {
            if( isupper )
            {
                j1 = i;
                j2 = n-1;
            }
            else
            {
                j1 = 0;
                j2 = i;
            }
            for(j=j1; j<=j2; j++)
            {
                if( ae_fp_neq(alpha,(double)(0))&&k>0 )
                {
                    v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &a->ptr.pp_complex[ia+j][ja], 1, "Conj", ae_v_len(ja,ja+k-1));
                }
                else
                {
                    v = ae_complex_from_i(0);
                }
                if( ae_fp_eq(beta,(double)(0)) )
                {
                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul_d(v,alpha);
                }
                else
                {
                    c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul_d(c->ptr.pp_complex[ic+i][jc+j],beta),ae_c_mul_d(v,alpha));
                }
            }
        }
        return;
    }
    else
    {

        /*
         * C=alpha*A^H*A+beta*C
         */
        for(i=0; i<=n-1; i++)
        {
            if( isupper )
            {
                j1 = i;
                j2 = n-1;
            }
            else
            {
                j1 = 0;
                j2 = i;
            }
            if( ae_fp_eq(beta,(double)(0)) )
            {
                for(j=j1; j<=j2; j++)
                {
                    c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_i(0);
                }
            }
            else
            {
                ae_v_cmuld(&c->ptr.pp_complex[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta);
            }
        }
        if( ae_fp_neq(alpha,(double)(0))&&k>0 )
        {
            for(i=0; i<=k-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    if( isupper )
                    {
                        j1 = j;
                        j2 = n-1;
                    }
                    else
                    {
                        j1 = 0;
                        j2 = j;
                    }
                    v = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[ia+i][ja+j], _state),alpha);
                    ae_v_caddc(&c->ptr.pp_complex[ic+j][jc+j1], 1, &a->ptr.pp_complex[ia+i][ja+j1], 1, "N", ae_v_len(jc+j1,jc+j2), v);
                }
            }
        }
        return;
    }
}


/*************************************************************************
Level 2 subrotuine
*************************************************************************/
static void ablas_rmatrixsyrk2(ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     double beta,
     /* Real    */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_bool isupper,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t j1;
    ae_int_t j2;
    double v;


    /*
     * Fast exit (nothing to be done)
     */
    if( (ae_fp_eq(alpha,(double)(0))||k==0)&&ae_fp_eq(beta,(double)(1)) )
    {
        return;
    }

    /*
     * Try to call fast SYRK
     */
    if( rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
    {
        return;
    }

    /*
     * SYRK
     */
    if( optypea==0 )
    {

        /*
         * C=alpha*A*A^H+beta*C
         */
        for(i=0; i<=n-1; i++)
        {
            if( isupper )
            {
                j1 = i;
                j2 = n-1;
            }
            else
            {
                j1 = 0;
                j2 = i;
            }
            for(j=j1; j<=j2; j++)
            {
                if( ae_fp_neq(alpha,(double)(0))&&k>0 )
                {
                    v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &a->ptr.pp_double[ia+j][ja], 1, ae_v_len(ja,ja+k-1));
                }
                else
                {
                    v = (double)(0);
                }
                if( ae_fp_eq(beta,(double)(0)) )
                {
                    c->ptr.pp_double[ic+i][jc+j] = alpha*v;
                }
                else
                {
                    c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v;
                }
            }
        }
        return;
    }
    else
    {

        /*
         * C=alpha*A^H*A+beta*C
         */
        for(i=0; i<=n-1; i++)
        {
            if( isupper )
            {
                j1 = i;
                j2 = n-1;
            }
            else
            {
                j1 = 0;
                j2 = i;
            }
            if( ae_fp_eq(beta,(double)(0)) )
            {
                for(j=j1; j<=j2; j++)
                {
                    c->ptr.pp_double[ic+i][jc+j] = (double)(0);
                }
            }
            else
            {
                ae_v_muld(&c->ptr.pp_double[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta);
            }
        }
        if( ae_fp_neq(alpha,(double)(0))&&k>0 )
        {
            for(i=0; i<=k-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    if( isupper )
                    {
                        j1 = j;
                        j2 = n-1;
                    }
                    else
                    {
                        j1 = 0;
                        j2 = j;
                    }
                    v = alpha*a->ptr.pp_double[ia+i][ja+j];
                    ae_v_addd(&c->ptr.pp_double[ic+j][jc+j1], 1, &a->ptr.pp_double[ia+i][ja+j1], 1, ae_v_len(jc+j1,jc+j2), v);
                }
            }
        }
        return;
    }
}


/*************************************************************************
This subroutine is an actual implementation of CMatrixGEMM.  It  does  not
perform some integrity checks performed in the  driver  function,  and  it
does not activate multithreading  framework  (driver  decides  whether  to
activate workers or not).

  -- ALGLIB routine --
     10.01.2019
     Bochkanov Sergey
*************************************************************************/
static void ablas_cmatrixgemmrec(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     ae_complex alpha,
     /* Complex */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     /* Complex */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_int_t optypeb,
     ae_complex beta,
     /* Complex */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    /*
     * Tile hierarchy: B -> A -> A/2
     */
    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax3(m, n, k, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "CMatrixGEMMRec: integrity check failed", _state);

    /*
     * Use MKL or ALGLIB basecase code
     */
    if( imax3(m, n, k, _state)<=tsb )
    {
        if( cmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
        {
            return;
        }
    }
    if( imax3(m, n, k, _state)<=tsa )
    {
        cmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        return;
    }

    /*
     * Recursive algorithm: parallel splitting on M/N
     */
    if( m>=n&&m>=k )
    {

        /*
         * A*B = (A1 A2)^T*B
         */
        tiledsplit(m, tscur, &s1, &s2, _state);
        ablas_cmatrixgemmrec(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        if( optypea==0 )
        {
            ablas_cmatrixgemmrec(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
        }
        else
        {
            ablas_cmatrixgemmrec(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
        }
        return;
    }
    if( n>=m&&n>=k )
    {

        /*
         * A*B = A*(B1 B2)
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        if( optypeb==0 )
        {
            ablas_cmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
            ablas_cmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state);
        }
        else
        {
            ablas_cmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
            ablas_cmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state);
        }
        return;
    }

    /*
     * Recursive algorithm: serial splitting on K
     */

    /*
     * A*B = (A1 A2)*(B1 B2)^T
     */
    tiledsplit(k, tscur, &s1, &s2, _state);
    if( optypea==0&&optypeb==0 )
    {
        ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
    }
    if( optypea==0&&optypeb!=0 )
    {
        ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
    }
    if( optypea!=0&&optypeb==0 )
    {
        ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
    }
    if( optypea!=0&&optypeb!=0 )
    {
        ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_ablas_cmatrixgemmrec(ae_int_t m,
    ae_int_t n,
    ae_int_t k,
    ae_complex alpha,
    /* Complex */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    /* Complex */ ae_matrix* b,
    ae_int_t ib,
    ae_int_t jb,
    ae_int_t optypeb,
    ae_complex beta,
    /* Complex */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
This subroutine is an actual implementation of RMatrixGEMM.  It  does  not
perform some integrity checks performed in the  driver  function,  and  it
does not activate multithreading  framework  (driver  decides  whether  to
activate workers or not).

  -- ALGLIB routine --
     10.01.2019
     Bochkanov Sergey
*************************************************************************/
static void ablas_rmatrixgemmrec(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     double alpha,
     /* Real    */ ae_matrix* a,
     ae_int_t ia,
     ae_int_t ja,
     ae_int_t optypea,
     /* Real    */ ae_matrix* b,
     ae_int_t ib,
     ae_int_t jb,
     ae_int_t optypeb,
     double beta,
     /* Real    */ ae_matrix* c,
     ae_int_t ic,
     ae_int_t jc,
     ae_state *_state)
{
    ae_int_t s1;
    ae_int_t s2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( imax3(m, n, k, _state)<=tsb )
    {
        tscur = tsa;
    }
    ae_assert(tscur>=1, "RMatrixGEMMRec: integrity check failed", _state);

    /*
     * Use MKL or ALGLIB basecase code
     */
    if( (m<=tsb&&n<=tsb)&&k<=tsb )
    {
        if( rmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
        {
            return;
        }
    }
    if( (m<=tsa&&n<=tsa)&&k<=tsa )
    {
        rmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        return;
    }

    /*
     * Recursive algorithm: split on M or N
     */
    if( m>=n&&m>=k )
    {

        /*
         * A*B = (A1 A2)^T*B
         */
        tiledsplit(m, tscur, &s1, &s2, _state);
        if( optypea==0 )
        {
            ablas_rmatrixgemmrec(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
            ablas_rmatrixgemmrec(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        }
        else
        {
            ablas_rmatrixgemmrec(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
            ablas_rmatrixgemmrec(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        }
        return;
    }
    if( n>=m&&n>=k )
    {

        /*
         * A*B = A*(B1 B2)
         */
        tiledsplit(n, tscur, &s1, &s2, _state);
        if( optypeb==0 )
        {
            ablas_rmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state);
            ablas_rmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        }
        else
        {
            ablas_rmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state);
            ablas_rmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        }
        return;
    }

    /*
     * Recursive algorithm: split on K
     */

    /*
     * A*B = (A1 A2)*(B1 B2)^T
     */
    tiledsplit(k, tscur, &s1, &s2, _state);
    if( optypea==0&&optypeb==0 )
    {
        ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state);
    }
    if( optypea==0&&optypeb!=0 )
    {
        ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state);
    }
    if( optypea!=0&&optypeb==0 )
    {
        ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state);
    }
    if( optypea!=0&&optypeb!=0 )
    {
        ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
        ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state);
    }
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_ablas_rmatrixgemmrec(ae_int_t m,
    ae_int_t n,
    ae_int_t k,
    double alpha,
    /* Real    */ ae_matrix* a,
    ae_int_t ia,
    ae_int_t ja,
    ae_int_t optypea,
    /* Real    */ ae_matrix* b,
    ae_int_t ib,
    ae_int_t jb,
    ae_int_t optypeb,
    double beta,
    /* Real    */ ae_matrix* c,
    ae_int_t ic,
    ae_int_t jc,
    ae_state *_state)
{
    return ae_false;
}


#endif
#if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
QR decomposition of a rectangular matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices Q and R in compact form (see below).
    Tau -   array of scalar factors which are used to form
            matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].

Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.

The elements of matrix R are located on and above the main diagonal of
matrix A. The elements which are located in Tau array and below the main
diagonal of matrix A are used to form matrix Q as follows:

Matrix Q is represented as a product of elementary reflections

Q = H(0)*H(2)*...*H(k-1),

where k = min(m,n), and each H(i) is in the form

H(i) = 1 - tau * v * (v^T)

where tau is a scalar stored in Tau[I]; v - real vector,
so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixqr(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tau,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t rowscount;
    ae_int_t i;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_vector_clear(tau);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);

    if( m<=0||n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    minmn = ae_minint(m, n, _state);
    ts = matrixtilesizeb(_state);
    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(tau, minmn, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, m, ts, _state);
    ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
    ae_matrix_set_length(&tmpr, 2*ts, n, _state);

    /*
     * Blocked code
     */
    blockstart = 0;
    while(blockstart!=minmn)
    {

        /*
         * Determine block size
         */
        blocksize = minmn-blockstart;
        if( blocksize>ts )
        {
            blocksize = ts;
        }
        rowscount = m-blockstart;

        /*
         * QR decomposition of submatrix.
         * Matrix is copied to temporary storage to solve
         * some TLB issues arising from non-contiguous memory
         * access pattern.
         */
        rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
        rmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state);
        rmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state);
        ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1));

        /*
         * Update the rest, choose between:
         * a) Level 2 algorithm (when the rest of the matrix is small enough)
         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
         *    representation for products of Householder transformations',
         *    by R. Schreiber and C. Van Loan.
         */
        if( blockstart+blocksize<=n-1 )
        {
            if( n-blockstart-blocksize>=2*ts||rowscount>=4*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply the rest of A by Q'.
                 *
                 * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
                 * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA'
                 */
                rmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, 1.0, &tmpa, 0, 0, 1, a, blockstart, blockstart+blocksize, 0, 0.0, &tmpr, 0, 0, _state);
                rmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, 1.0, &tmpt, 0, 0, 1, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state);
                rmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, a, blockstart, blockstart+blocksize, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=0; i<=blocksize-1; i++)
                {
                    ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i));
                    t.ptr.p_double[1] = (double)(1);
                    applyreflectionfromtheleft(a, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart+blocksize;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
LQ decomposition of a rectangular matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices L and Q in compact form (see below)
    Tau -   array of scalar factors which are used to form
            matrix Q. Array whose index ranges within [0..Min(M,N)-1].

Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.

The elements of matrix L are located on and below  the  main  diagonal  of
matrix A. The elements which are located in Tau array and above  the  main
diagonal of matrix A are used to form matrix Q as follows:

Matrix Q is represented as a product of elementary reflections

Q = H(k-1)*H(k-2)*...*H(1)*H(0),

where k = min(m,n), and each H(i) is of the form

H(i) = 1 - tau * v * (v^T)

where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlq(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tau,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t columnscount;
    ae_int_t i;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_vector_clear(tau);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);

    if( m<=0||n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    minmn = ae_minint(m, n, _state);
    ts = matrixtilesizeb(_state);
    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(tau, minmn, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, ts, n, _state);
    ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
    ae_matrix_set_length(&tmpr, m, 2*ts, _state);

    /*
     * Blocked code
     */
    blockstart = 0;
    while(blockstart!=minmn)
    {

        /*
         * Determine block size
         */
        blocksize = minmn-blockstart;
        if( blocksize>ts )
        {
            blocksize = ts;
        }
        columnscount = n-blockstart;

        /*
         * LQ decomposition of submatrix.
         * Matrix is copied to temporary storage to solve
         * some TLB issues arising from non-contiguous memory
         * access pattern.
         */
        rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
        rmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state);
        rmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state);
        ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1));

        /*
         * Update the rest, choose between:
         * a) Level 2 algorithm (when the rest of the matrix is small enough)
         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
         *    representation for products of Householder transformations',
         *    by R. Schreiber and C. Van Loan.
         */
        if( blockstart+blocksize<=m-1 )
        {
            if( m-blockstart-blocksize>=2*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply the rest of A by Q.
                 *
                 * Q  = E + Y*T*Y'  = E + TmpA'*TmpT*TmpA
                 */
                rmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, 1.0, a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state);
                rmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, 0.0, &tmpr, 0, blocksize, _state);
                rmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, a, blockstart+blocksize, blockstart, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=0; i<=blocksize-1; i++)
                {
                    ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i));
                    t.ptr.p_double[1] = (double)(1);
                    applyreflectionfromtheright(a, taubuf.ptr.p_double[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart+blocksize;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
QR decomposition of a rectangular complex matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices Q and R in compact form
    Tau -   array of scalar factors which are used to form matrix Q. Array
            whose indexes range within [0.. Min(M,N)-1]

Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
MxM, R - upper triangular (or upper trapezoid) matrix of size MxN.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
void cmatrixqr(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* tau,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t rowscount;
    ae_int_t i;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_vector_clear(tau);
    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);

    if( m<=0||n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    ts = matrixtilesizeb(_state)/2;
    minmn = ae_minint(m, n, _state);
    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(tau, minmn, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, m, ts, _state);
    ae_matrix_set_length(&tmpt, ts, ts, _state);
    ae_matrix_set_length(&tmpr, 2*ts, n, _state);

    /*
     * Blocked code
     */
    blockstart = 0;
    while(blockstart!=minmn)
    {

        /*
         * Determine block size
         */
        blocksize = minmn-blockstart;
        if( blocksize>ts )
        {
            blocksize = ts;
        }
        rowscount = m-blockstart;

        /*
         * QR decomposition of submatrix.
         * Matrix is copied to temporary storage to solve
         * some TLB issues arising from non-contiguous memory
         * access pattern.
         */
        cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
        ortfac_cmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state);
        cmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state);
        ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1));

        /*
         * Update the rest, choose between:
         * a) Level 2 algorithm (when the rest of the matrix is small enough)
         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
         *    representation for products of Householder transformations',
         *    by R. Schreiber and C. Van Loan.
         */
        if( blockstart+blocksize<=n-1 )
        {
            if( n-blockstart-blocksize>=2*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply the rest of A by Q'.
                 *
                 * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
                 * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA'
                 */
                cmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, a, blockstart, blockstart+blocksize, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
                cmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 2, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state);
                cmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), a, blockstart, blockstart+blocksize, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=0; i<=blocksize-1; i++)
                {
                    ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i));
                    t.ptr.p_complex[1] = ae_complex_from_i(1);
                    complexapplyreflectionfromtheleft(a, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart+blocksize;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
LQ decomposition of a rectangular complex matrix of size MxN

Input parameters:
    A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
    M   -   number of rows in matrix A.
    N   -   number of columns in matrix A.

Output parameters:
    A   -   matrices Q and L in compact form
    Tau -   array of scalar factors which are used to form matrix Q. Array
            whose indexes range within [0.. Min(M,N)-1]

Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
MxM, L - lower triangular (or lower trapezoid) matrix of size MxN.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
void cmatrixlq(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* tau,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t columnscount;
    ae_int_t i;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_vector_clear(tau);
    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);

    if( m<=0||n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    ts = matrixtilesizeb(_state)/2;
    minmn = ae_minint(m, n, _state);
    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(tau, minmn, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, ts, n, _state);
    ae_matrix_set_length(&tmpt, ts, ts, _state);
    ae_matrix_set_length(&tmpr, m, 2*ts, _state);

    /*
     * Blocked code
     */
    blockstart = 0;
    while(blockstart!=minmn)
    {

        /*
         * Determine block size
         */
        blocksize = minmn-blockstart;
        if( blocksize>ts )
        {
            blocksize = ts;
        }
        columnscount = n-blockstart;

        /*
         * LQ decomposition of submatrix.
         * Matrix is copied to temporary storage to solve
         * some TLB issues arising from non-contiguous memory
         * access pattern.
         */
        cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
        ortfac_cmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state);
        cmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state);
        ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1));

        /*
         * Update the rest, choose between:
         * a) Level 2 algorithm (when the rest of the matrix is small enough)
         * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
         *    representation for products of Householder transformations',
         *    by R. Schreiber and C. Van Loan.
         */
        if( blockstart+blocksize<=m-1 )
        {
            if( m-blockstart-blocksize>=2*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply the rest of A by Q.
                 *
                 * Q  = E + Y*T*Y'  = E + TmpA'*TmpT*TmpA
                 */
                cmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
                cmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state);
                cmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=0; i<=blocksize-1; i++)
                {
                    ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i));
                    t.ptr.p_complex[1] = ae_complex_from_i(1);
                    complexapplyreflectionfromtheright(a, taubuf.ptr.p_complex[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart+blocksize;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Partial unpacking of matrix Q from the QR decomposition of a matrix A

Input parameters:
    A       -   matrices Q and R in compact form.
                Output of RMatrixQR subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.
    Tau     -   scalar factors which are used to form Q.
                Output of the RMatrixQR subroutine.
    QColumns -  required number of columns of matrix Q. M>=QColumns>=0.

Output parameters:
    Q       -   first QColumns columns of matrix Q.
                Array whose indexes range within [0..M-1, 0..QColumns-1].
                If QColumns=0, the array remains unchanged.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixqrunpackq(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tau,
     ae_int_t qcolumns,
     /* Real    */ ae_matrix* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_int_t refcnt;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t rowscount;
    ae_int_t i;
    ae_int_t j;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_matrix_clear(q);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);

    ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state);
    if( (m<=0||n<=0)||qcolumns<=0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * init
     */
    ts = matrixtilesizeb(_state);
    minmn = ae_minint(m, n, _state);
    refcnt = ae_minint(minmn, qcolumns, _state);
    ae_matrix_set_length(q, m, qcolumns, _state);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=qcolumns-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                q->ptr.pp_double[i][j] = (double)(0);
            }
        }
    }
    ae_vector_set_length(&work, ae_maxint(m, qcolumns, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, qcolumns, _state)+1, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, m, ts, _state);
    ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
    ae_matrix_set_length(&tmpr, 2*ts, qcolumns, _state);

    /*
     * Blocked code
     */
    blockstart = ts*(refcnt/ts);
    blocksize = refcnt-blockstart;
    while(blockstart>=0)
    {
        rowscount = m-blockstart;
        if( blocksize>0 )
        {

            /*
             * Copy current block
             */
            rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
            ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1));

            /*
             * Update, choose between:
             * a) Level 2 algorithm (when the rest of the matrix is small enough)
             * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
             *    representation for products of Householder transformations',
             *    by R. Schreiber and C. Van Loan.
             */
            if( qcolumns>=2*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply matrix by Q.
                 *
                 * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
                 */
                rmatrixgemm(blocksize, qcolumns, rowscount, 1.0, &tmpa, 0, 0, 1, q, blockstart, 0, 0, 0.0, &tmpr, 0, 0, _state);
                rmatrixgemm(blocksize, qcolumns, blocksize, 1.0, &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state);
                rmatrixgemm(rowscount, qcolumns, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, q, blockstart, 0, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=blocksize-1; i>=0; i--)
                {
                    ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i));
                    t.ptr.p_double[1] = (double)(1);
                    applyreflectionfromtheleft(q, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart-ts;
        blocksize = ts;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking of matrix R from the QR decomposition of a matrix A

Input parameters:
    A       -   matrices Q and R in compact form.
                Output of RMatrixQR subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    R       -   matrix R, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixqrunpackr(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* r,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;

    ae_matrix_clear(r);

    if( m<=0||n<=0 )
    {
        return;
    }
    k = ae_minint(m, n, _state);
    ae_matrix_set_length(r, m, n, _state);
    for(i=0; i<=n-1; i++)
    {
        r->ptr.pp_double[0][i] = (double)(0);
    }
    for(i=1; i<=m-1; i++)
    {
        ae_v_move(&r->ptr.pp_double[i][0], 1, &r->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
    }
    for(i=0; i<=k-1; i++)
    {
        ae_v_move(&r->ptr.pp_double[i][i], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
    }
}


/*************************************************************************
Partial unpacking of matrix Q from the LQ decomposition of a matrix A

Input parameters:
    A       -   matrices L and Q in compact form.
                Output of RMatrixLQ subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.
    Tau     -   scalar factors which are used to form Q.
                Output of the RMatrixLQ subroutine.
    QRows   -   required number of rows in matrix Q. N>=QRows>=0.

Output parameters:
    Q       -   first QRows rows of matrix Q. Array whose indexes range
                within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
                unchanged.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlqunpackq(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tau,
     ae_int_t qrows,
     /* Real    */ ae_matrix* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_int_t refcnt;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t columnscount;
    ae_int_t i;
    ae_int_t j;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_matrix_clear(q);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);

    ae_assert(qrows<=n, "RMatrixLQUnpackQ: QRows>N!", _state);
    if( (m<=0||n<=0)||qrows<=0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * init
     */
    ts = matrixtilesizeb(_state);
    minmn = ae_minint(m, n, _state);
    refcnt = ae_minint(minmn, qrows, _state);
    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, ts, n, _state);
    ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
    ae_matrix_set_length(&tmpr, qrows, 2*ts, _state);
    ae_matrix_set_length(q, qrows, n, _state);
    for(i=0; i<=qrows-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                q->ptr.pp_double[i][j] = (double)(0);
            }
        }
    }

    /*
     * Blocked code
     */
    blockstart = ts*(refcnt/ts);
    blocksize = refcnt-blockstart;
    while(blockstart>=0)
    {
        columnscount = n-blockstart;
        if( blocksize>0 )
        {

            /*
             * Copy submatrix
             */
            rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
            ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1));

            /*
             * Update matrix, choose between:
             * a) Level 2 algorithm (when the rest of the matrix is small enough)
             * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
             *    representation for products of Householder transformations',
             *    by R. Schreiber and C. Van Loan.
             */
            if( qrows>=2*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply the rest of A by Q'.
                 *
                 * Q'  = E + Y*T'*Y'  = E + TmpA'*TmpT'*TmpA
                 */
                rmatrixgemm(qrows, blocksize, columnscount, 1.0, q, 0, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state);
                rmatrixgemm(qrows, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 1, 0.0, &tmpr, 0, blocksize, _state);
                rmatrixgemm(qrows, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, q, 0, blockstart, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=blocksize-1; i>=0; i--)
                {
                    ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i));
                    t.ptr.p_double[1] = (double)(1);
                    applyreflectionfromtheright(q, taubuf.ptr.p_double[i], &t, 0, qrows-1, blockstart+i, n-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart-ts;
        blocksize = ts;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking of matrix L from the LQ decomposition of a matrix A

Input parameters:
    A       -   matrices Q and L in compact form.
                Output of RMatrixLQ subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    L       -   matrix L, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlqunpackl(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_matrix* l,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;

    ae_matrix_clear(l);

    if( m<=0||n<=0 )
    {
        return;
    }
    ae_matrix_set_length(l, m, n, _state);
    for(i=0; i<=n-1; i++)
    {
        l->ptr.pp_double[0][i] = (double)(0);
    }
    for(i=1; i<=m-1; i++)
    {
        ae_v_move(&l->ptr.pp_double[i][0], 1, &l->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
    }
    for(i=0; i<=m-1; i++)
    {
        k = ae_minint(i, n-1, _state);
        ae_v_move(&l->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k));
    }
}


/*************************************************************************
Partial unpacking of matrix Q from QR decomposition of a complex matrix A.

Input parameters:
    A           -   matrices Q and R in compact form.
                    Output of CMatrixQR subroutine .
    M           -   number of rows in matrix A. M>=0.
    N           -   number of columns in matrix A. N>=0.
    Tau         -   scalar factors which are used to form Q.
                    Output of CMatrixQR subroutine .
    QColumns    -   required number of columns in matrix Q. M>=QColumns>=0.

Output parameters:
    Q           -   first QColumns columns of matrix Q.
                    Array whose index ranges within [0..M-1, 0..QColumns-1].
                    If QColumns=0, array isn't changed.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixqrunpackq(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* tau,
     ae_int_t qcolumns,
     /* Complex */ ae_matrix* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_int_t refcnt;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t rowscount;
    ae_int_t i;
    ae_int_t j;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_matrix_clear(q);
    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);

    ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state);
    if( m<=0||n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * init
     */
    ts = matrixtilesizeb(_state)/2;
    minmn = ae_minint(m, n, _state);
    refcnt = ae_minint(minmn, qcolumns, _state);
    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, m, ts, _state);
    ae_matrix_set_length(&tmpt, ts, ts, _state);
    ae_matrix_set_length(&tmpr, 2*ts, qcolumns, _state);
    ae_matrix_set_length(q, m, qcolumns, _state);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=qcolumns-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_complex[i][j] = ae_complex_from_i(1);
            }
            else
            {
                q->ptr.pp_complex[i][j] = ae_complex_from_i(0);
            }
        }
    }

    /*
     * Blocked code
     */
    blockstart = ts*(refcnt/ts);
    blocksize = refcnt-blockstart;
    while(blockstart>=0)
    {
        rowscount = m-blockstart;
        if( blocksize>0 )
        {

            /*
             * QR decomposition of submatrix.
             * Matrix is copied to temporary storage to solve
             * some TLB issues arising from non-contiguous memory
             * access pattern.
             */
            cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
            ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1));

            /*
             * Update matrix, choose between:
             * a) Level 2 algorithm (when the rest of the matrix is small enough)
             * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
             *    representation for products of Householder transformations',
             *    by R. Schreiber and C. Van Loan.
             */
            if( qcolumns>=2*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply the rest of A by Q.
                 *
                 * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
                 */
                cmatrixgemm(blocksize, qcolumns, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, q, blockstart, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
                cmatrixgemm(blocksize, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state);
                cmatrixgemm(rowscount, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), q, blockstart, 0, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=blocksize-1; i>=0; i--)
                {
                    ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i));
                    t.ptr.p_complex[1] = ae_complex_from_i(1);
                    complexapplyreflectionfromtheleft(q, taubuf.ptr.p_complex[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart-ts;
        blocksize = ts;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking of matrix R from the QR decomposition of a matrix A

Input parameters:
    A       -   matrices Q and R in compact form.
                Output of CMatrixQR subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    R       -   matrix R, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixqrunpackr(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* r,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;

    ae_matrix_clear(r);

    if( m<=0||n<=0 )
    {
        return;
    }
    k = ae_minint(m, n, _state);
    ae_matrix_set_length(r, m, n, _state);
    for(i=0; i<=n-1; i++)
    {
        r->ptr.pp_complex[0][i] = ae_complex_from_i(0);
    }
    for(i=1; i<=m-1; i++)
    {
        ae_v_cmove(&r->ptr.pp_complex[i][0], 1, &r->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1));
    }
    for(i=0; i<=k-1; i++)
    {
        ae_v_cmove(&r->ptr.pp_complex[i][i], 1, &a->ptr.pp_complex[i][i], 1, "N", ae_v_len(i,n-1));
    }
}


/*************************************************************************
Partial unpacking of matrix Q from LQ decomposition of a complex matrix A.

Input parameters:
    A           -   matrices Q and R in compact form.
                    Output of CMatrixLQ subroutine .
    M           -   number of rows in matrix A. M>=0.
    N           -   number of columns in matrix A. N>=0.
    Tau         -   scalar factors which are used to form Q.
                    Output of CMatrixLQ subroutine .
    QRows       -   required number of rows in matrix Q. N>=QColumns>=0.

Output parameters:
    Q           -   first QRows rows of matrix Q.
                    Array whose index ranges within [0..QRows-1, 0..N-1].
                    If QRows=0, array isn't changed.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixlqunpackq(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* tau,
     ae_int_t qrows,
     /* Complex */ ae_matrix* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_vector taubuf;
    ae_int_t minmn;
    ae_int_t refcnt;
    ae_matrix tmpa;
    ae_matrix tmpt;
    ae_matrix tmpr;
    ae_int_t blockstart;
    ae_int_t blocksize;
    ae_int_t columnscount;
    ae_int_t i;
    ae_int_t j;
    ae_int_t ts;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    memset(&taubuf, 0, sizeof(taubuf));
    memset(&tmpa, 0, sizeof(tmpa));
    memset(&tmpt, 0, sizeof(tmpt));
    memset(&tmpr, 0, sizeof(tmpr));
    ae_matrix_clear(q);
    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);

    if( m<=0||n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * Init
     */
    ts = matrixtilesizeb(_state)/2;
    minmn = ae_minint(m, n, _state);
    refcnt = ae_minint(minmn, qrows, _state);
    ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
    ae_vector_set_length(&taubuf, minmn, _state);
    ae_matrix_set_length(&tmpa, ts, n, _state);
    ae_matrix_set_length(&tmpt, ts, ts, _state);
    ae_matrix_set_length(&tmpr, qrows, 2*ts, _state);
    ae_matrix_set_length(q, qrows, n, _state);
    for(i=0; i<=qrows-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_complex[i][j] = ae_complex_from_i(1);
            }
            else
            {
                q->ptr.pp_complex[i][j] = ae_complex_from_i(0);
            }
        }
    }

    /*
     * Blocked code
     */
    blockstart = ts*(refcnt/ts);
    blocksize = refcnt-blockstart;
    while(blockstart>=0)
    {
        columnscount = n-blockstart;
        if( blocksize>0 )
        {

            /*
             * LQ decomposition of submatrix.
             * Matrix is copied to temporary storage to solve
             * some TLB issues arising from non-contiguous memory
             * access pattern.
             */
            cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
            ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1));

            /*
             * Update matrix, choose between:
             * a) Level 2 algorithm (when the rest of the matrix is small enough)
             * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
             *    representation for products of Householder transformations',
             *    by R. Schreiber and C. Van Loan.
             */
            if( qrows>=2*ts )
            {

                /*
                 * Prepare block reflector
                 */
                ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);

                /*
                 * Multiply the rest of A by Q'.
                 *
                 * Q'  = E + Y*T'*Y'  = E + TmpA'*TmpT'*TmpA
                 */
                cmatrixgemm(qrows, blocksize, columnscount, ae_complex_from_d(1.0), q, 0, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
                cmatrixgemm(qrows, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state);
                cmatrixgemm(qrows, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), q, 0, blockstart, _state);
            }
            else
            {

                /*
                 * Level 2 algorithm
                 */
                for(i=blocksize-1; i>=0; i--)
                {
                    ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i));
                    t.ptr.p_complex[1] = ae_complex_from_i(1);
                    complexapplyreflectionfromtheright(q, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, 0, qrows-1, blockstart+i, n-1, &work, _state);
                }
            }
        }

        /*
         * Advance
         */
        blockstart = blockstart-ts;
        blocksize = ts;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking of matrix L from the LQ decomposition of a matrix A

Input parameters:
    A       -   matrices Q and L in compact form.
                Output of CMatrixLQ subroutine.
    M       -   number of rows in given matrix A. M>=0.
    N       -   number of columns in given matrix A. N>=0.

Output parameters:
    L       -   matrix L, array[0..M-1, 0..N-1].

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixlqunpackl(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_matrix* l,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;

    ae_matrix_clear(l);

    if( m<=0||n<=0 )
    {
        return;
    }
    ae_matrix_set_length(l, m, n, _state);
    for(i=0; i<=n-1; i++)
    {
        l->ptr.pp_complex[0][i] = ae_complex_from_i(0);
    }
    for(i=1; i<=m-1; i++)
    {
        ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &l->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1));
    }
    for(i=0; i<=m-1; i++)
    {
        k = ae_minint(i, n-1, _state);
        ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,k));
    }
}


/*************************************************************************
Base case for real QR

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994.
     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
     pseudocode, 2007-2010.
*************************************************************************/
void rmatrixqrbasecase(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* work,
     /* Real    */ ae_vector* t,
     /* Real    */ ae_vector* tau,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    ae_int_t minmn;
    double tmp;


    minmn = ae_minint(m, n, _state);

    /*
     * Test the input arguments
     */
    k = minmn;
    for(i=0; i<=k-1; i++)
    {

        /*
         * Generate elementary reflector H(i) to annihilate A(i+1:m,i)
         */
        ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i));
        generatereflection(t, m-i, &tmp, _state);
        tau->ptr.p_double[i] = tmp;
        ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t->ptr.p_double[1], 1, ae_v_len(i,m-1));
        t->ptr.p_double[1] = (double)(1);
        if( i<n )
        {

            /*
             * Apply H(i) to A(i:m-1,i+1:n-1) from the left
             */
            applyreflectionfromtheleft(a, tau->ptr.p_double[i], t, i, m-1, i+1, n-1, work, _state);
        }
    }
}


/*************************************************************************
Base case for real LQ

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994.
     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
     pseudocode, 2007-2010.
*************************************************************************/
void rmatrixlqbasecase(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* work,
     /* Real    */ ae_vector* t,
     /* Real    */ ae_vector* tau,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    double tmp;


    k = ae_minint(m, n, _state);
    for(i=0; i<=k-1; i++)
    {

        /*
         * Generate elementary reflector H(i) to annihilate A(i,i+1:n-1)
         */
        ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
        generatereflection(t, n-i, &tmp, _state);
        tau->ptr.p_double[i] = tmp;
        ae_v_move(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[1], 1, ae_v_len(i,n-1));
        t->ptr.p_double[1] = (double)(1);
        if( i<n )
        {

            /*
             * Apply H(i) to A(i+1:m,i:n) from the right
             */
            applyreflectionfromtheright(a, tau->ptr.p_double[i], t, i+1, m-1, i, n-1, work, _state);
        }
    }
}


/*************************************************************************
Reduction of a rectangular matrix to  bidiagonal form

The algorithm reduces the rectangular matrix A to  bidiagonal form by
orthogonal transformations P and Q: A = Q*B*(P^T).

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   source matrix. array[0..M-1, 0..N-1]
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.

Output parameters:
    A       -   matrices Q, B, P in compact form (see below).
    TauQ    -   scalar factors which are used to form matrix Q.
    TauP    -   scalar factors which are used to form matrix P.

The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
reflections which form MxM matrix Q and NxN matrix P, respectively.

If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).

If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
is stored in A(i,i+1:n-1).

EXAMPLE:

m=6, n=5 (m > n):               m=5, n=6 (m < n):

(  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
(  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
(  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
(  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
(  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
(  v1  v2  v3  v4  v5 )

Here vi and ui are vectors which form H(i) and G(i), and d and e -
are the diagonal and off-diagonal elements of matrix B.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994.
     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
     pseudocode, 2007-2010.
*************************************************************************/
void rmatrixbd(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tauq,
     /* Real    */ ae_vector* taup,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_vector t;
    ae_int_t maxmn;
    ae_int_t i;
    double ltau;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&t, 0, sizeof(t));
    ae_vector_clear(tauq);
    ae_vector_clear(taup);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);


    /*
     * Prepare
     */
    if( n<=0||m<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    maxmn = ae_maxint(m, n, _state);
    ae_vector_set_length(&work, maxmn+1, _state);
    ae_vector_set_length(&t, maxmn+1, _state);
    if( m>=n )
    {
        ae_vector_set_length(tauq, n, _state);
        ae_vector_set_length(taup, n, _state);
        for(i=0; i<=n-1; i++)
        {
            tauq->ptr.p_double[i] = 0.0;
            taup->ptr.p_double[i] = 0.0;
        }
    }
    else
    {
        ae_vector_set_length(tauq, m, _state);
        ae_vector_set_length(taup, m, _state);
        for(i=0; i<=m-1; i++)
        {
            tauq->ptr.p_double[i] = 0.0;
            taup->ptr.p_double[i] = 0.0;
        }
    }

    /*
     * Try to use MKL code
     *
     * NOTE: buffers Work[] and T[] are used for temporary storage of diagonals;
     * because they are present in A[], we do not use them.
     */
    if( rmatrixbdmkl(a, m, n, &work, &t, tauq, taup, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB code
     */
    if( m>=n )
    {

        /*
         * Reduce to upper bidiagonal form
         */
        for(i=0; i<=n-1; i++)
        {

            /*
             * Generate elementary reflector H(i) to annihilate A(i+1:m-1,i)
             */
            ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i));
            generatereflection(&t, m-i, &ltau, _state);
            tauq->ptr.p_double[i] = ltau;
            ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i,m-1));
            t.ptr.p_double[1] = (double)(1);

            /*
             * Apply H(i) to A(i:m-1,i+1:n-1) from the left
             */
            applyreflectionfromtheleft(a, ltau, &t, i, m-1, i+1, n-1, &work, _state);
            if( i<n-1 )
            {

                /*
                 * Generate elementary reflector G(i) to annihilate
                 * A(i,i+2:n-1)
                 */
                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-i-1));
                generatereflection(&t, n-1-i, &ltau, _state);
                taup->ptr.p_double[i] = ltau;
                ae_v_move(&a->ptr.pp_double[i][i+1], 1, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
                t.ptr.p_double[1] = (double)(1);

                /*
                 * Apply G(i) to A(i+1:m-1,i+1:n-1) from the right
                 */
                applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state);
            }
            else
            {
                taup->ptr.p_double[i] = (double)(0);
            }
        }
    }
    else
    {

        /*
         * Reduce to lower bidiagonal form
         */
        for(i=0; i<=m-1; i++)
        {

            /*
             * Generate elementary reflector G(i) to annihilate A(i,i+1:n-1)
             */
            ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
            generatereflection(&t, n-i, &ltau, _state);
            taup->ptr.p_double[i] = ltau;
            ae_v_move(&a->ptr.pp_double[i][i], 1, &t.ptr.p_double[1], 1, ae_v_len(i,n-1));
            t.ptr.p_double[1] = (double)(1);

            /*
             * Apply G(i) to A(i+1:m-1,i:n-1) from the right
             */
            applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i, n-1, &work, _state);
            if( i<m-1 )
            {

                /*
                 * Generate elementary reflector H(i) to annihilate
                 * A(i+2:m-1,i)
                 */
                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,m-1-i));
                generatereflection(&t, m-1-i, &ltau, _state);
                tauq->ptr.p_double[i] = ltau;
                ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,m-1));
                t.ptr.p_double[1] = (double)(1);

                /*
                 * Apply H(i) to A(i+1:m-1,i+1:n-1) from the left
                 */
                applyreflectionfromtheleft(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state);
            }
            else
            {
                tauq->ptr.p_double[i] = (double)(0);
            }
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking matrix Q which reduces a matrix to bidiagonal form.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    QP          -   matrices Q and P in compact form.
                    Output of ToBidiagonal subroutine.
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    TAUQ        -   scalar factors which are used to form Q.
                    Output of ToBidiagonal subroutine.
    QColumns    -   required number of columns in matrix Q.
                    M>=QColumns>=0.

Output parameters:
    Q           -   first QColumns columns of matrix Q.
                    Array[0..M-1, 0..QColumns-1]
                    If QColumns=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdunpackq(/* Real    */ ae_matrix* qp,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tauq,
     ae_int_t qcolumns,
     /* Real    */ ae_matrix* q,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;

    ae_matrix_clear(q);

    ae_assert(qcolumns<=m, "RMatrixBDUnpackQ: QColumns>M!", _state);
    ae_assert(qcolumns>=0, "RMatrixBDUnpackQ: QColumns<0!", _state);
    if( (m==0||n==0)||qcolumns==0 )
    {
        return;
    }

    /*
     * prepare Q
     */
    ae_matrix_set_length(q, m, qcolumns, _state);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=qcolumns-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                q->ptr.pp_double[i][j] = (double)(0);
            }
        }
    }

    /*
     * Calculate
     */
    rmatrixbdmultiplybyq(qp, m, n, tauq, q, m, qcolumns, ae_false, ae_false, _state);
}


/*************************************************************************
Multiplication by matrix Q which reduces matrix A to  bidiagonal form.

The algorithm allows pre- or post-multiply by Q or Q'.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    QP          -   matrices Q and P in compact form.
                    Output of ToBidiagonal subroutine.
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    TAUQ        -   scalar factors which are used to form Q.
                    Output of ToBidiagonal subroutine.
    Z           -   multiplied matrix.
                    array[0..ZRows-1,0..ZColumns-1]
    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
                    ZRows=M, otherwise ZRows can be arbitrary.
    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
                    ZColumns=M, otherwise ZColumns can be arbitrary.
    FromTheRight -  pre- or post-multiply.
    DoTranspose -   multiply by Q or Q'.

Output parameters:
    Z           -   product of Z and Q.
                    Array[0..ZRows-1,0..ZColumns-1]
                    If ZRows=0 or ZColumns=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdmultiplybyq(/* Real    */ ae_matrix* qp,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tauq,
     /* Real    */ ae_matrix* z,
     ae_int_t zrows,
     ae_int_t zcolumns,
     ae_bool fromtheright,
     ae_bool dotranspose,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t i1;
    ae_int_t i2;
    ae_int_t istep;
    ae_vector v;
    ae_vector work;
    ae_vector dummy;
    ae_int_t mx;

    ae_frame_make(_state, &_frame_block);
    memset(&v, 0, sizeof(v));
    memset(&work, 0, sizeof(work));
    memset(&dummy, 0, sizeof(dummy));
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&dummy, 0, DT_REAL, _state, ae_true);

    if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    ae_assert((fromtheright&&zcolumns==m)||(!fromtheright&&zrows==m), "RMatrixBDMultiplyByQ: incorrect Z size!", _state);

    /*
     * Try to use MKL code
     */
    if( rmatrixbdmultiplybymkl(qp, m, n, tauq, &dummy, z, zrows, zcolumns, ae_true, fromtheright, dotranspose, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * init
     */
    mx = ae_maxint(m, n, _state);
    mx = ae_maxint(mx, zrows, _state);
    mx = ae_maxint(mx, zcolumns, _state);
    ae_vector_set_length(&v, mx+1, _state);
    ae_vector_set_length(&work, mx+1, _state);
    if( m>=n )
    {

        /*
         * setup
         */
        if( fromtheright )
        {
            i1 = 0;
            i2 = n-1;
            istep = 1;
        }
        else
        {
            i1 = n-1;
            i2 = 0;
            istep = -1;
        }
        if( dotranspose )
        {
            i = i1;
            i1 = i2;
            i2 = i;
            istep = -istep;
        }

        /*
         * Process
         */
        i = i1;
        do
        {
            ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], qp->stride, ae_v_len(1,m-i));
            v.ptr.p_double[1] = (double)(1);
            if( fromtheright )
            {
                applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i, m-1, &work, _state);
            }
            else
            {
                applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i, m-1, 0, zcolumns-1, &work, _state);
            }
            i = i+istep;
        }
        while(i!=i2+istep);
    }
    else
    {

        /*
         * setup
         */
        if( fromtheright )
        {
            i1 = 0;
            i2 = m-2;
            istep = 1;
        }
        else
        {
            i1 = m-2;
            i2 = 0;
            istep = -1;
        }
        if( dotranspose )
        {
            i = i1;
            i1 = i2;
            i2 = i;
            istep = -istep;
        }

        /*
         * Process
         */
        if( m-1>0 )
        {
            i = i1;
            do
            {
                ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i+1][i], qp->stride, ae_v_len(1,m-i-1));
                v.ptr.p_double[1] = (double)(1);
                if( fromtheright )
                {
                    applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i+1, m-1, &work, _state);
                }
                else
                {
                    applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i+1, m-1, 0, zcolumns-1, &work, _state);
                }
                i = i+istep;
            }
            while(i!=i2+istep);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking matrix P which reduces matrix A to bidiagonal form.
The subroutine returns transposed matrix P.

Input parameters:
    QP      -   matrices Q and P in compact form.
                Output of ToBidiagonal subroutine.
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.
    TAUP    -   scalar factors which are used to form P.
                Output of ToBidiagonal subroutine.
    PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.

Output parameters:
    PT      -   first PTRows columns of matrix P^T
                Array[0..PTRows-1, 0..N-1]
                If PTRows=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdunpackpt(/* Real    */ ae_matrix* qp,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* taup,
     ae_int_t ptrows,
     /* Real    */ ae_matrix* pt,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;

    ae_matrix_clear(pt);

    ae_assert(ptrows<=n, "RMatrixBDUnpackPT: PTRows>N!", _state);
    ae_assert(ptrows>=0, "RMatrixBDUnpackPT: PTRows<0!", _state);
    if( (m==0||n==0)||ptrows==0 )
    {
        return;
    }

    /*
     * prepare PT
     */
    ae_matrix_set_length(pt, ptrows, n, _state);
    for(i=0; i<=ptrows-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                pt->ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                pt->ptr.pp_double[i][j] = (double)(0);
            }
        }
    }

    /*
     * Calculate
     */
    rmatrixbdmultiplybyp(qp, m, n, taup, pt, ptrows, n, ae_true, ae_true, _state);
}


/*************************************************************************
Multiplication by matrix P which reduces matrix A to  bidiagonal form.

The algorithm allows pre- or post-multiply by P or P'.

Input parameters:
    QP          -   matrices Q and P in compact form.
                    Output of RMatrixBD subroutine.
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    TAUP        -   scalar factors which are used to form P.
                    Output of RMatrixBD subroutine.
    Z           -   multiplied matrix.
                    Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
    ZRows       -   number of rows in matrix Z. If FromTheRight=False,
                    ZRows=N, otherwise ZRows can be arbitrary.
    ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
                    ZColumns=N, otherwise ZColumns can be arbitrary.
    FromTheRight -  pre- or post-multiply.
    DoTranspose -   multiply by P or P'.

Output parameters:
    Z - product of Z and P.
                Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
                If ZRows=0 or ZColumns=0, the array is not modified.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdmultiplybyp(/* Real    */ ae_matrix* qp,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* taup,
     /* Real    */ ae_matrix* z,
     ae_int_t zrows,
     ae_int_t zcolumns,
     ae_bool fromtheright,
     ae_bool dotranspose,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_vector v;
    ae_vector work;
    ae_vector dummy;
    ae_int_t mx;
    ae_int_t i1;
    ae_int_t i2;
    ae_int_t istep;

    ae_frame_make(_state, &_frame_block);
    memset(&v, 0, sizeof(v));
    memset(&work, 0, sizeof(work));
    memset(&dummy, 0, sizeof(dummy));
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&dummy, 0, DT_REAL, _state, ae_true);

    if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    ae_assert((fromtheright&&zcolumns==n)||(!fromtheright&&zrows==n), "RMatrixBDMultiplyByP: incorrect Z size!", _state);

    /*
     * init
     */
    mx = ae_maxint(m, n, _state);
    mx = ae_maxint(mx, zrows, _state);
    mx = ae_maxint(mx, zcolumns, _state);
    ae_vector_set_length(&v, mx+1, _state);
    ae_vector_set_length(&work, mx+1, _state);
    if( m>=n )
    {

        /*
         * setup
         */
        if( fromtheright )
        {
            i1 = n-2;
            i2 = 0;
            istep = -1;
        }
        else
        {
            i1 = 0;
            i2 = n-2;
            istep = 1;
        }
        if( !dotranspose )
        {
            i = i1;
            i1 = i2;
            i2 = i;
            istep = -istep;
        }

        /*
         * Process
         */
        if( n-1>0 )
        {
            i = i1;
            do
            {
                ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-1-i));
                v.ptr.p_double[1] = (double)(1);
                if( fromtheright )
                {
                    applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i+1, n-1, &work, _state);
                }
                else
                {
                    applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i+1, n-1, 0, zcolumns-1, &work, _state);
                }
                i = i+istep;
            }
            while(i!=i2+istep);
        }
    }
    else
    {

        /*
         * setup
         */
        if( fromtheright )
        {
            i1 = m-1;
            i2 = 0;
            istep = -1;
        }
        else
        {
            i1 = 0;
            i2 = m-1;
            istep = 1;
        }
        if( !dotranspose )
        {
            i = i1;
            i1 = i2;
            i2 = i;
            istep = -istep;
        }

        /*
         * Process
         */
        i = i1;
        do
        {
            ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
            v.ptr.p_double[1] = (double)(1);
            if( fromtheright )
            {
                applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i, n-1, &work, _state);
            }
            else
            {
                applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i, n-1, 0, zcolumns-1, &work, _state);
            }
            i = i+istep;
        }
        while(i!=i2+istep);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking of the main and secondary diagonals of bidiagonal decomposition
of matrix A.

Input parameters:
    B   -   output of RMatrixBD subroutine.
    M   -   number of rows in matrix B.
    N   -   number of columns in matrix B.

Output parameters:
    IsUpper -   True, if the matrix is upper bidiagonal.
                otherwise IsUpper is False.
    D       -   the main diagonal.
                Array whose index ranges within [0..Min(M,N)-1].
    E       -   the secondary diagonal (upper or lower, depending on
                the value of IsUpper).
                Array index ranges within [0..Min(M,N)-1], the last
                element is not used.

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixbdunpackdiagonals(/* Real    */ ae_matrix* b,
     ae_int_t m,
     ae_int_t n,
     ae_bool* isupper,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_state *_state)
{
    ae_int_t i;

    *isupper = ae_false;
    ae_vector_clear(d);
    ae_vector_clear(e);

    *isupper = m>=n;
    if( m<=0||n<=0 )
    {
        return;
    }
    if( *isupper )
    {
        ae_vector_set_length(d, n, _state);
        ae_vector_set_length(e, n, _state);
        for(i=0; i<=n-2; i++)
        {
            d->ptr.p_double[i] = b->ptr.pp_double[i][i];
            e->ptr.p_double[i] = b->ptr.pp_double[i][i+1];
        }
        d->ptr.p_double[n-1] = b->ptr.pp_double[n-1][n-1];
    }
    else
    {
        ae_vector_set_length(d, m, _state);
        ae_vector_set_length(e, m, _state);
        for(i=0; i<=m-2; i++)
        {
            d->ptr.p_double[i] = b->ptr.pp_double[i][i];
            e->ptr.p_double[i] = b->ptr.pp_double[i+1][i];
        }
        d->ptr.p_double[m-1] = b->ptr.pp_double[m-1][m-1];
    }
}


/*************************************************************************
Reduction of a square matrix to  upper Hessenberg form: Q'*A*Q = H,
where Q is an orthogonal matrix, H - Hessenberg matrix.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   matrix A with elements [0..N-1, 0..N-1]
    N       -   size of matrix A.

Output parameters:
    A       -   matrices Q and P in  compact form (see below).
    Tau     -   array of scalar factors which are used to form matrix Q.
                Array whose index ranges within [0..N-2]

Matrix H is located on the main diagonal, on the lower secondary  diagonal
and above the main diagonal of matrix A. The elements which are used to
form matrix Q are situated in array Tau and below the lower secondary
diagonal of matrix A as follows:

Matrix Q is represented as a product of elementary reflections

Q = H(0)*H(2)*...*H(n-2),

where each H(i) is given by

H(i) = 1 - tau * v * (v^T)

where tau is a scalar stored in Tau[I]; v - is a real vector,
so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i).

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
void rmatrixhessenberg(/* Real    */ ae_matrix* a,
     ae_int_t n,
     /* Real    */ ae_vector* tau,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    double v;
    ae_vector t;
    ae_vector work;

    ae_frame_make(_state, &_frame_block);
    memset(&t, 0, sizeof(t));
    memset(&work, 0, sizeof(work));
    ae_vector_clear(tau);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);

    ae_assert(n>=0, "RMatrixHessenberg: incorrect N!", _state);

    /*
     * Quick return if possible
     */
    if( n<=1 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * Allocate place
     */
    ae_vector_set_length(tau, n-2+1, _state);
    ae_vector_set_length(&t, n+1, _state);
    ae_vector_set_length(&work, n-1+1, _state);

    /*
     * MKL version
     */
    if( rmatrixhessenbergmkl(a, n, tau, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version
     */
    for(i=0; i<=n-2; i++)
    {

        /*
         * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
         */
        ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
        generatereflection(&t, n-i-1, &v, _state);
        ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
        tau->ptr.p_double[i] = v;
        t.ptr.p_double[1] = (double)(1);

        /*
         * Apply H(i) to A(1:ihi,i+1:ihi) from the right
         */
        applyreflectionfromtheright(a, v, &t, 0, n-1, i+1, n-1, &work, _state);

        /*
         * Apply H(i) to A(i+1:ihi,i+1:n) from the left
         */
        applyreflectionfromtheleft(a, v, &t, i+1, n-1, i+1, n-1, &work, _state);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking matrix Q which reduces matrix A to upper Hessenberg form

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A   -   output of RMatrixHessenberg subroutine.
    N   -   size of matrix A.
    Tau -   scalar factors which are used to form Q.
            Output of RMatrixHessenberg subroutine.

Output parameters:
    Q   -   matrix Q.
            Array whose indexes range within [0..N-1, 0..N-1].

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixhessenbergunpackq(/* Real    */ ae_matrix* a,
     ae_int_t n,
     /* Real    */ ae_vector* tau,
     /* Real    */ ae_matrix* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_vector v;
    ae_vector work;

    ae_frame_make(_state, &_frame_block);
    memset(&v, 0, sizeof(v));
    memset(&work, 0, sizeof(work));
    ae_matrix_clear(q);
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);

    if( n==0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * init
     */
    ae_matrix_set_length(q, n-1+1, n-1+1, _state);
    ae_vector_set_length(&v, n-1+1, _state);
    ae_vector_set_length(&work, n-1+1, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                q->ptr.pp_double[i][j] = (double)(0);
            }
        }
    }

    /*
     * MKL version
     */
    if( rmatrixhessenbergunpackqmkl(a, n, tau, q, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version: unpack Q
     */
    for(i=0; i<=n-2; i++)
    {

        /*
         * Apply H(i)
         */
        ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
        v.ptr.p_double[1] = (double)(1);
        applyreflectionfromtheright(q, tau->ptr.p_double[i], &v, 0, n-1, i+1, n-1, &work, _state);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form)

Input parameters:
    A   -   output of RMatrixHessenberg subroutine.
    N   -   size of matrix A.

Output parameters:
    H   -   matrix H. Array whose indexes range within [0..N-1, 0..N-1].

  -- ALGLIB --
     2005-2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixhessenbergunpackh(/* Real    */ ae_matrix* a,
     ae_int_t n,
     /* Real    */ ae_matrix* h,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_vector v;
    ae_vector work;

    ae_frame_make(_state, &_frame_block);
    memset(&v, 0, sizeof(v));
    memset(&work, 0, sizeof(work));
    ae_matrix_clear(h);
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);

    if( n==0 )
    {
        ae_frame_leave(_state);
        return;
    }
    ae_matrix_set_length(h, n-1+1, n-1+1, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=i-2; j++)
        {
            h->ptr.pp_double[i][j] = (double)(0);
        }
        j = ae_maxint(0, i-1, _state);
        ae_v_move(&h->ptr.pp_double[i][j], 1, &a->ptr.pp_double[i][j], 1, ae_v_len(j,n-1));
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Reduction of a symmetric matrix which is given by its higher or lower
triangular part to a tridiagonal matrix using orthogonal similarity
transformation: Q'*A*Q=T.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   matrix to be transformed
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format. If IsUpper = True, then matrix A is given
                by its upper triangle, and the lower triangle is not used
                and not modified by the algorithm, and vice versa
                if IsUpper = False.

Output parameters:
    A       -   matrices T and Q in  compact form (see lower)
    Tau     -   array of factors which are forming matrices H(i)
                array with elements [0..N-2].
    D       -   main diagonal of symmetric matrix T.
                array with elements [0..N-1].
    E       -   secondary diagonal of symmetric matrix T.
                array with elements [0..N-2].


  If IsUpper=True, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(n-2) . . . H(2) H(0).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a real scalar, and v is a real vector with
  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
  A(0:i-1,i+1), and tau in TAU(i).

  If IsUpper=False, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(0) H(2) . . . H(n-2).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a real scalar, and v is a real vector with
  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
  and tau in TAU(i).

  The contents of A on exit are illustrated by the following examples
  with n = 5:

  if UPLO = 'U':                       if UPLO = 'L':

    (  d   e   v1  v2  v3 )              (  d                  )
    (      d   e   v2  v3 )              (  e   d              )
    (          d   e   v3 )              (  v0  e   d          )
    (              d   e  )              (  v0  v1  e   d      )
    (                  d  )              (  v0  v1  v2  e   d  )

  where d and e denote diagonal and off-diagonal elements of T, and vi
  denotes an element of the vector defining H(i).

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
void smatrixtd(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* tau,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    double alpha;
    double taui;
    double v;
    ae_vector t;
    ae_vector t2;
    ae_vector t3;

    ae_frame_make(_state, &_frame_block);
    memset(&t, 0, sizeof(t));
    memset(&t2, 0, sizeof(t2));
    memset(&t3, 0, sizeof(t3));
    ae_vector_clear(tau);
    ae_vector_clear(d);
    ae_vector_clear(e);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t3, 0, DT_REAL, _state, ae_true);

    if( n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    ae_vector_set_length(&t, n+1, _state);
    ae_vector_set_length(&t2, n+1, _state);
    ae_vector_set_length(&t3, n+1, _state);
    if( n>1 )
    {
        ae_vector_set_length(tau, n-2+1, _state);
    }
    ae_vector_set_length(d, n-1+1, _state);
    if( n>1 )
    {
        ae_vector_set_length(e, n-2+1, _state);
    }

    /*
     * Try to use MKL
     */
    if( smatrixtdmkl(a, n, isupper, tau, d, e, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version
     */
    if( isupper )
    {

        /*
         * Reduce the upper triangle of A
         */
        for(i=n-2; i>=0; i--)
        {

            /*
             * Generate elementary reflector H() = E - tau * v * v'
             */
            if( i>=1 )
            {
                ae_v_move(&t.ptr.p_double[2], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(2,i+1));
            }
            t.ptr.p_double[1] = a->ptr.pp_double[i][i+1];
            generatereflection(&t, i+1, &taui, _state);
            if( i>=1 )
            {
                ae_v_move(&a->ptr.pp_double[0][i+1], a->stride, &t.ptr.p_double[2], 1, ae_v_len(0,i-1));
            }
            a->ptr.pp_double[i][i+1] = t.ptr.p_double[1];
            e->ptr.p_double[i] = a->ptr.pp_double[i][i+1];
            if( ae_fp_neq(taui,(double)(0)) )
            {

                /*
                 * Apply H from both sides to A
                 */
                a->ptr.pp_double[i][i+1] = (double)(1);

                /*
                 * Compute  x := tau * A * v  storing x in TAU
                 */
                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
                symmetricmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t3, _state);
                ae_v_move(&tau->ptr.p_double[0], 1, &t3.ptr.p_double[1], 1, ae_v_len(0,i));

                /*
                 * Compute  w := x - 1/2 * tau * (x'*v) * v
                 */
                v = ae_v_dotproduct(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i));
                alpha = -0.5*taui*v;
                ae_v_addd(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i), alpha);

                /*
                 * Apply the transformation as a rank-2 update:
                 *    A := A - v * w' - w * v'
                 */
                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
                ae_v_move(&t3.ptr.p_double[1], 1, &tau->ptr.p_double[0], 1, ae_v_len(1,i+1));
                symmetricrank2update(a, isupper, 0, i, &t, &t3, &t2, (double)(-1), _state);
                a->ptr.pp_double[i][i+1] = e->ptr.p_double[i];
            }
            d->ptr.p_double[i+1] = a->ptr.pp_double[i+1][i+1];
            tau->ptr.p_double[i] = taui;
        }
        d->ptr.p_double[0] = a->ptr.pp_double[0][0];
    }
    else
    {

        /*
         * Reduce the lower triangle of A
         */
        for(i=0; i<=n-2; i++)
        {

            /*
             * Generate elementary reflector H = E - tau * v * v'
             */
            ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
            generatereflection(&t, n-i-1, &taui, _state);
            ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
            e->ptr.p_double[i] = a->ptr.pp_double[i+1][i];
            if( ae_fp_neq(taui,(double)(0)) )
            {

                /*
                 * Apply H from both sides to A
                 */
                a->ptr.pp_double[i+1][i] = (double)(1);

                /*
                 * Compute  x := tau * A * v  storing y in TAU
                 */
                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
                symmetricmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state);
                ae_v_move(&tau->ptr.p_double[i], 1, &t2.ptr.p_double[1], 1, ae_v_len(i,n-2));

                /*
                 * Compute  w := x - 1/2 * tau * (x'*v) * v
                 */
                v = ae_v_dotproduct(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2));
                alpha = -0.5*taui*v;
                ae_v_addd(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2), alpha);

                /*
                 * Apply the transformation as a rank-2 update:
                 *     A := A - v * w' - w * v'
                 *
                 */
                ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
                ae_v_move(&t2.ptr.p_double[1], 1, &tau->ptr.p_double[i], 1, ae_v_len(1,n-i-1));
                symmetricrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, (double)(-1), _state);
                a->ptr.pp_double[i+1][i] = e->ptr.p_double[i];
            }
            d->ptr.p_double[i] = a->ptr.pp_double[i][i];
            tau->ptr.p_double[i] = taui;
        }
        d->ptr.p_double[n-1] = a->ptr.pp_double[n-1][n-1];
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking matrix Q which reduces symmetric matrix to a tridiagonal
form.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   the result of a SMatrixTD subroutine
    N       -   size of matrix A.
    IsUpper -   storage format (a parameter of SMatrixTD subroutine)
    Tau     -   the result of a SMatrixTD subroutine

Output parameters:
    Q       -   transformation matrix.
                array with elements [0..N-1, 0..N-1].

  -- ALGLIB --
     Copyright 2005-2010 by Bochkanov Sergey
*************************************************************************/
void smatrixtdunpackq(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* tau,
     /* Real    */ ae_matrix* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_vector v;
    ae_vector work;

    ae_frame_make(_state, &_frame_block);
    memset(&v, 0, sizeof(v));
    memset(&work, 0, sizeof(work));
    ae_matrix_clear(q);
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);

    if( n==0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * init
     */
    ae_matrix_set_length(q, n-1+1, n-1+1, _state);
    ae_vector_set_length(&v, n+1, _state);
    ae_vector_set_length(&work, n-1+1, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                q->ptr.pp_double[i][j] = (double)(0);
            }
        }
    }

    /*
     * MKL version
     */
    if( smatrixtdunpackqmkl(a, n, isupper, tau, q, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version: unpack Q
     */
    if( isupper )
    {
        for(i=0; i<=n-2; i++)
        {

            /*
             * Apply H(i)
             */
            ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
            v.ptr.p_double[i+1] = (double)(1);
            applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, 0, i, 0, n-1, &work, _state);
        }
    }
    else
    {
        for(i=n-2; i>=0; i--)
        {

            /*
             * Apply H(i)
             */
            ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
            v.ptr.p_double[1] = (double)(1);
            applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, i+1, n-1, 0, n-1, &work, _state);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Reduction of a Hermitian matrix which is given  by  its  higher  or  lower
triangular part to a real  tridiagonal  matrix  using  unitary  similarity
transformation: Q'*A*Q = T.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   matrix to be transformed
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format. If IsUpper = True, then matrix A is  given
                by its upper triangle, and the lower triangle is not  used
                and not modified by the algorithm, and vice versa
                if IsUpper = False.

Output parameters:
    A       -   matrices T and Q in  compact form (see lower)
    Tau     -   array of factors which are forming matrices H(i)
                array with elements [0..N-2].
    D       -   main diagonal of real symmetric matrix T.
                array with elements [0..N-1].
    E       -   secondary diagonal of real symmetric matrix T.
                array with elements [0..N-2].


  If IsUpper=True, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(n-2) . . . H(2) H(0).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a complex scalar, and v is a complex vector with
  v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
  A(0:i-1,i+1), and tau in TAU(i).

  If IsUpper=False, the matrix Q is represented as a product of elementary
  reflectors

     Q = H(0) H(2) . . . H(n-2).

  Each H(i) has the form

     H(i) = I - tau * v * v'

  where tau is a complex scalar, and v is a complex vector with
  v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
  and tau in TAU(i).

  The contents of A on exit are illustrated by the following examples
  with n = 5:

  if UPLO = 'U':                       if UPLO = 'L':

    (  d   e   v1  v2  v3 )              (  d                  )
    (      d   e   v2  v3 )              (  e   d              )
    (          d   e   v3 )              (  v0  e   d          )
    (              d   e  )              (  v0  v1  e   d      )
    (                  d  )              (  v0  v1  v2  e   d  )

where d and e denote diagonal and off-diagonal elements of T, and vi
denotes an element of the vector defining H(i).

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
void hmatrixtd(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tau,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_complex alpha;
    ae_complex taui;
    ae_complex v;
    ae_vector t;
    ae_vector t2;
    ae_vector t3;

    ae_frame_make(_state, &_frame_block);
    memset(&t, 0, sizeof(t));
    memset(&t2, 0, sizeof(t2));
    memset(&t3, 0, sizeof(t3));
    ae_vector_clear(tau);
    ae_vector_clear(d);
    ae_vector_clear(e);
    ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&t2, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&t3, 0, DT_COMPLEX, _state, ae_true);


    /*
     * Init and test
     */
    if( n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    for(i=0; i<=n-1; i++)
    {
        ae_assert(ae_fp_eq(a->ptr.pp_complex[i][i].y,(double)(0)), "Assertion failed", _state);
    }
    if( n>1 )
    {
        ae_vector_set_length(tau, n-2+1, _state);
        ae_vector_set_length(e, n-2+1, _state);
    }
    ae_vector_set_length(d, n-1+1, _state);
    ae_vector_set_length(&t, n-1+1, _state);
    ae_vector_set_length(&t2, n-1+1, _state);
    ae_vector_set_length(&t3, n-1+1, _state);

    /*
     * MKL version
     */
    if( hmatrixtdmkl(a, n, isupper, tau, d, e, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version
     */
    if( isupper )
    {

        /*
         * Reduce the upper triangle of A
         */
        a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(a->ptr.pp_complex[n-1][n-1].x);
        for(i=n-2; i>=0; i--)
        {

            /*
             * Generate elementary reflector H = I+1 - tau * v * v'
             */
            alpha = a->ptr.pp_complex[i][i+1];
            t.ptr.p_complex[1] = alpha;
            if( i>=1 )
            {
                ae_v_cmove(&t.ptr.p_complex[2], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(2,i+1));
            }
            complexgeneratereflection(&t, i+1, &taui, _state);
            if( i>=1 )
            {
                ae_v_cmove(&a->ptr.pp_complex[0][i+1], a->stride, &t.ptr.p_complex[2], 1, "N", ae_v_len(0,i-1));
            }
            alpha = t.ptr.p_complex[1];
            e->ptr.p_double[i] = alpha.x;
            if( ae_c_neq_d(taui,(double)(0)) )
            {

                /*
                 * Apply H(I+1) from both sides to A
                 */
                a->ptr.pp_complex[i][i+1] = ae_complex_from_i(1);

                /*
                 * Compute  x := tau * A * v  storing x in TAU
                 */
                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
                hermitianmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t2, _state);
                ae_v_cmove(&tau->ptr.p_complex[0], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(0,i));

                /*
                 * Compute  w := x - 1/2 * tau * (x'*v) * v
                 */
                v = ae_v_cdotproduct(&tau->ptr.p_complex[0], 1, "Conj", &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i));
                alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v));
                ae_v_caddc(&tau->ptr.p_complex[0], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i), alpha);

                /*
                 * Apply the transformation as a rank-2 update:
                 *    A := A - v * w' - w * v'
                 */
                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
                ae_v_cmove(&t3.ptr.p_complex[1], 1, &tau->ptr.p_complex[0], 1, "N", ae_v_len(1,i+1));
                hermitianrank2update(a, isupper, 0, i, &t, &t3, &t2, ae_complex_from_i(-1), _state);
            }
            else
            {
                a->ptr.pp_complex[i][i] = ae_complex_from_d(a->ptr.pp_complex[i][i].x);
            }
            a->ptr.pp_complex[i][i+1] = ae_complex_from_d(e->ptr.p_double[i]);
            d->ptr.p_double[i+1] = a->ptr.pp_complex[i+1][i+1].x;
            tau->ptr.p_complex[i] = taui;
        }
        d->ptr.p_double[0] = a->ptr.pp_complex[0][0].x;
    }
    else
    {

        /*
         * Reduce the lower triangle of A
         */
        a->ptr.pp_complex[0][0] = ae_complex_from_d(a->ptr.pp_complex[0][0].x);
        for(i=0; i<=n-2; i++)
        {

            /*
             * Generate elementary reflector H = I - tau * v * v'
             */
            ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
            complexgeneratereflection(&t, n-i-1, &taui, _state);
            ae_v_cmove(&a->ptr.pp_complex[i+1][i], a->stride, &t.ptr.p_complex[1], 1, "N", ae_v_len(i+1,n-1));
            e->ptr.p_double[i] = a->ptr.pp_complex[i+1][i].x;
            if( ae_c_neq_d(taui,(double)(0)) )
            {

                /*
                 * Apply H(i) from both sides to A(i+1:n,i+1:n)
                 */
                a->ptr.pp_complex[i+1][i] = ae_complex_from_i(1);

                /*
                 * Compute  x := tau * A * v  storing y in TAU
                 */
                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
                hermitianmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state);
                ae_v_cmove(&tau->ptr.p_complex[i], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(i,n-2));

                /*
                 * Compute  w := x - 1/2 * tau * (x'*v) * v
                 */
                v = ae_v_cdotproduct(&tau->ptr.p_complex[i], 1, "Conj", &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2));
                alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v));
                ae_v_caddc(&tau->ptr.p_complex[i], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2), alpha);

                /*
                 * Apply the transformation as a rank-2 update:
                 * A := A - v * w' - w * v'
                 */
                ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
                ae_v_cmove(&t2.ptr.p_complex[1], 1, &tau->ptr.p_complex[i], 1, "N", ae_v_len(1,n-i-1));
                hermitianrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, ae_complex_from_i(-1), _state);
            }
            else
            {
                a->ptr.pp_complex[i+1][i+1] = ae_complex_from_d(a->ptr.pp_complex[i+1][i+1].x);
            }
            a->ptr.pp_complex[i+1][i] = ae_complex_from_d(e->ptr.p_double[i]);
            d->ptr.p_double[i] = a->ptr.pp_complex[i][i].x;
            tau->ptr.p_complex[i] = taui;
        }
        d->ptr.p_double[n-1] = a->ptr.pp_complex[n-1][n-1].x;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Unpacking matrix Q which reduces a Hermitian matrix to a real  tridiagonal
form.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   the result of a HMatrixTD subroutine
    N       -   size of matrix A.
    IsUpper -   storage format (a parameter of HMatrixTD subroutine)
    Tau     -   the result of a HMatrixTD subroutine

Output parameters:
    Q       -   transformation matrix.
                array with elements [0..N-1, 0..N-1].

  -- ALGLIB --
     Copyright 2005-2010 by Bochkanov Sergey
*************************************************************************/
void hmatrixtdunpackq(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tau,
     /* Complex */ ae_matrix* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_vector v;
    ae_vector work;

    ae_frame_make(_state, &_frame_block);
    memset(&v, 0, sizeof(v));
    memset(&work, 0, sizeof(work));
    ae_matrix_clear(q);
    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);

    if( n==0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * init
     */
    ae_matrix_set_length(q, n-1+1, n-1+1, _state);
    ae_vector_set_length(&v, n+1, _state);
    ae_vector_set_length(&work, n-1+1, _state);

    /*
     * MKL version
     */
    if( hmatrixtdunpackqmkl(a, n, isupper, tau, q, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version
     */
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                q->ptr.pp_complex[i][j] = ae_complex_from_i(1);
            }
            else
            {
                q->ptr.pp_complex[i][j] = ae_complex_from_i(0);
            }
        }
    }
    if( isupper )
    {
        for(i=0; i<=n-2; i++)
        {

            /*
             * Apply H(i)
             */
            ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
            v.ptr.p_complex[i+1] = ae_complex_from_i(1);
            complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, 0, i, 0, n-1, &work, _state);
        }
    }
    else
    {
        for(i=n-2; i>=0; i--)
        {

            /*
             * Apply H(i)
             */
            ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
            v.ptr.p_complex[1] = ae_complex_from_i(1);
            complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, i+1, n-1, 0, n-1, &work, _state);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Base case for complex QR

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994.
     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
     pseudocode, 2007-2010.
*************************************************************************/
static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* work,
     /* Complex */ ae_vector* t,
     /* Complex */ ae_vector* tau,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    ae_int_t mmi;
    ae_int_t minmn;
    ae_complex tmp;


    minmn = ae_minint(m, n, _state);
    if( minmn<=0 )
    {
        return;
    }

    /*
     * Test the input arguments
     */
    k = ae_minint(m, n, _state);
    for(i=0; i<=k-1; i++)
    {

        /*
         * Generate elementary reflector H(i) to annihilate A(i+1:m,i)
         */
        mmi = m-i;
        ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], a->stride, "N", ae_v_len(1,mmi));
        complexgeneratereflection(t, mmi, &tmp, _state);
        tau->ptr.p_complex[i] = tmp;
        ae_v_cmove(&a->ptr.pp_complex[i][i], a->stride, &t->ptr.p_complex[1], 1, "N", ae_v_len(i,m-1));
        t->ptr.p_complex[1] = ae_complex_from_i(1);
        if( i<n-1 )
        {

            /*
             * Apply H'(i) to A(i:m,i+1:n) from the left
             */
            complexapplyreflectionfromtheleft(a, ae_c_conj(tau->ptr.p_complex[i], _state), t, i, m-1, i+1, n-1, work, _state);
        }
    }
}


/*************************************************************************
Base case for complex LQ

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994.
     Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
     pseudocode, 2007-2010.
*************************************************************************/
static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Complex */ ae_vector* work,
     /* Complex */ ae_vector* t,
     /* Complex */ ae_vector* tau,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t minmn;
    ae_complex tmp;


    minmn = ae_minint(m, n, _state);
    if( minmn<=0 )
    {
        return;
    }

    /*
     * Test the input arguments
     */
    for(i=0; i<=minmn-1; i++)
    {

        /*
         * Generate elementary reflector H(i)
         *
         * NOTE: ComplexGenerateReflection() generates left reflector,
         * i.e. H which reduces x by applyiong from the left, but we
         * need RIGHT reflector. So we replace H=E-tau*v*v' by H^H,
         * which changes v to conj(v).
         */
        ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,n-i));
        complexgeneratereflection(t, n-i, &tmp, _state);
        tau->ptr.p_complex[i] = tmp;
        ae_v_cmove(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[1], 1, "Conj", ae_v_len(i,n-1));
        t->ptr.p_complex[1] = ae_complex_from_i(1);
        if( i<m-1 )
        {

            /*
             * Apply H'(i)
             */
            complexapplyreflectionfromtheright(a, tau->ptr.p_complex[i], t, i+1, m-1, i, n-1, work, _state);
        }
    }
}


/*************************************************************************
Generate block reflector:
* fill unused parts of reflectors matrix by zeros
* fill diagonal of reflectors matrix by ones
* generate triangular factor T

PARAMETERS:
    A           -   either LengthA*BlockSize (if ColumnwiseA) or
                    BlockSize*LengthA (if not ColumnwiseA) matrix of
                    elementary reflectors.
                    Modified on exit.
    Tau         -   scalar factors
    ColumnwiseA -   reflectors are stored in rows or in columns
    LengthA     -   length of largest reflector
    BlockSize   -   number of reflectors
    T           -   array[BlockSize,2*BlockSize]. Left BlockSize*BlockSize
                    submatrix stores triangular factor on exit.
    WORK        -   array[BlockSize]

  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
static void ortfac_rmatrixblockreflector(/* Real    */ ae_matrix* a,
     /* Real    */ ae_vector* tau,
     ae_bool columnwisea,
     ae_int_t lengtha,
     ae_int_t blocksize,
     /* Real    */ ae_matrix* t,
     /* Real    */ ae_vector* work,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    double v;


    /*
     * fill beginning of new column with zeros,
     * load 1.0 in the first non-zero element
     */
    for(k=0; k<=blocksize-1; k++)
    {
        if( columnwisea )
        {
            for(i=0; i<=k-1; i++)
            {
                a->ptr.pp_double[i][k] = (double)(0);
            }
        }
        else
        {
            for(i=0; i<=k-1; i++)
            {
                a->ptr.pp_double[k][i] = (double)(0);
            }
        }
        a->ptr.pp_double[k][k] = (double)(1);
    }

    /*
     * Calculate Gram matrix of A
     */
    for(i=0; i<=blocksize-1; i++)
    {
        for(j=0; j<=blocksize-1; j++)
        {
            t->ptr.pp_double[i][blocksize+j] = (double)(0);
        }
    }
    for(k=0; k<=lengtha-1; k++)
    {
        for(j=1; j<=blocksize-1; j++)
        {
            if( columnwisea )
            {
                v = a->ptr.pp_double[k][j];
                if( ae_fp_neq(v,(double)(0)) )
                {
                    ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[k][0], 1, ae_v_len(blocksize,blocksize+j-1), v);
                }
            }
            else
            {
                v = a->ptr.pp_double[j][k];
                if( ae_fp_neq(v,(double)(0)) )
                {
                    ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[0][k], a->stride, ae_v_len(blocksize,blocksize+j-1), v);
                }
            }
        }
    }

    /*
     * Prepare Y (stored in TmpA) and T (stored in TmpT)
     */
    for(k=0; k<=blocksize-1; k++)
    {

        /*
         * fill non-zero part of T, use pre-calculated Gram matrix
         */
        ae_v_move(&work->ptr.p_double[0], 1, &t->ptr.pp_double[k][blocksize], 1, ae_v_len(0,k-1));
        for(i=0; i<=k-1; i++)
        {
            v = ae_v_dotproduct(&t->ptr.pp_double[i][i], 1, &work->ptr.p_double[i], 1, ae_v_len(i,k-1));
            t->ptr.pp_double[i][k] = -tau->ptr.p_double[k]*v;
        }
        t->ptr.pp_double[k][k] = -tau->ptr.p_double[k];

        /*
         * Rest of T is filled by zeros
         */
        for(i=k+1; i<=blocksize-1; i++)
        {
            t->ptr.pp_double[i][k] = (double)(0);
        }
    }
}


/*************************************************************************
Generate block reflector (complex):
* fill unused parts of reflectors matrix by zeros
* fill diagonal of reflectors matrix by ones
* generate triangular factor T


  -- ALGLIB routine --
     17.02.2010
     Bochkanov Sergey
*************************************************************************/
static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a,
     /* Complex */ ae_vector* tau,
     ae_bool columnwisea,
     ae_int_t lengtha,
     ae_int_t blocksize,
     /* Complex */ ae_matrix* t,
     /* Complex */ ae_vector* work,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    ae_complex v;


    /*
     * Prepare Y (stored in TmpA) and T (stored in TmpT)
     */
    for(k=0; k<=blocksize-1; k++)
    {

        /*
         * fill beginning of new column with zeros,
         * load 1.0 in the first non-zero element
         */
        if( columnwisea )
        {
            for(i=0; i<=k-1; i++)
            {
                a->ptr.pp_complex[i][k] = ae_complex_from_i(0);
            }
        }
        else
        {
            for(i=0; i<=k-1; i++)
            {
                a->ptr.pp_complex[k][i] = ae_complex_from_i(0);
            }
        }
        a->ptr.pp_complex[k][k] = ae_complex_from_i(1);

        /*
         * fill non-zero part of T,
         */
        for(i=0; i<=k-1; i++)
        {
            if( columnwisea )
            {
                v = ae_v_cdotproduct(&a->ptr.pp_complex[k][i], a->stride, "Conj", &a->ptr.pp_complex[k][k], a->stride, "N", ae_v_len(k,lengtha-1));
            }
            else
            {
                v = ae_v_cdotproduct(&a->ptr.pp_complex[i][k], 1, "N", &a->ptr.pp_complex[k][k], 1, "Conj", ae_v_len(k,lengtha-1));
            }
            work->ptr.p_complex[i] = v;
        }
        for(i=0; i<=k-1; i++)
        {
            v = ae_v_cdotproduct(&t->ptr.pp_complex[i][i], 1, "N", &work->ptr.p_complex[i], 1, "N", ae_v_len(i,k-1));
            t->ptr.pp_complex[i][k] = ae_c_neg(ae_c_mul(tau->ptr.p_complex[k],v));
        }
        t->ptr.pp_complex[k][k] = ae_c_neg(tau->ptr.p_complex[k]);

        /*
         * Rest of T is filled by zeros
         */
        for(i=k+1; i<=blocksize-1; i++)
        {
            t->ptr.pp_complex[i][k] = ae_complex_from_i(0);
        }
    }
}


#endif
#if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Generation of a random uniformly distributed (Haar) orthogonal matrix

INPUT PARAMETERS:
    N   -   matrix size, N>=1

OUTPUT PARAMETERS:
    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]

NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
      "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
      Application to Condition Estimators".

      Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
      * takes an NxN one
      * takes uniformly distributed unit vector of dimension N+1.
      * constructs a Householder reflection from the vector, then applies
        it to the smaller matrix (embedded in the larger size with a 1 at
        the bottom right corner).

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndorthogonal(ae_int_t n,
     /* Real    */ ae_matrix* a,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;

    ae_matrix_clear(a);

    ae_assert(n>=1, "RMatrixRndOrthogonal: N<1!", _state);
    ae_matrix_set_length(a, n, n, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                a->ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                a->ptr.pp_double[i][j] = (double)(0);
            }
        }
    }
    rmatrixrndorthogonalfromtheright(a, n, n, _state);
}


/*************************************************************************
Generation of random NxN matrix with given condition number and norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndcond(ae_int_t n,
     double c,
     /* Real    */ ae_matrix* a,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double l1;
    double l2;
    hqrndstate rs;

    ae_frame_make(_state, &_frame_block);
    memset(&rs, 0, sizeof(rs));
    ae_matrix_clear(a);
    _hqrndstate_init(&rs, _state, ae_true);

    ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "RMatrixRndCond: N<1 or C<1!", _state);
    ae_matrix_set_length(a, n, n, _state);
    if( n==1 )
    {

        /*
         * special case
         */
        a->ptr.pp_double[0][0] = (double)(2*ae_randominteger(2, _state)-1);
        ae_frame_leave(_state);
        return;
    }
    hqrndrandomize(&rs, _state);
    l1 = (double)(0);
    l2 = ae_log(1/c, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_double[i][j] = (double)(0);
        }
    }
    a->ptr.pp_double[0][0] = ae_exp(l1, _state);
    for(i=1; i<=n-2; i++)
    {
        a->ptr.pp_double[i][i] = ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state);
    }
    a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);
    rmatrixrndorthogonalfromtheleft(a, n, n, _state);
    rmatrixrndorthogonalfromtheright(a, n, n, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Generation of a random Haar distributed orthogonal complex matrix

INPUT PARAMETERS:
    N   -   matrix size, N>=1

OUTPUT PARAMETERS:
    A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]

NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
      "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
      Application to Condition Estimators".

      Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
      * takes an NxN one
      * takes uniformly distributed unit vector of dimension N+1.
      * constructs a Householder reflection from the vector, then applies
        it to the smaller matrix (embedded in the larger size with a 1 at
        the bottom right corner).

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndorthogonal(ae_int_t n,
     /* Complex */ ae_matrix* a,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;

    ae_matrix_clear(a);

    ae_assert(n>=1, "CMatrixRndOrthogonal: N<1!", _state);
    ae_matrix_set_length(a, n, n, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            if( i==j )
            {
                a->ptr.pp_complex[i][j] = ae_complex_from_i(1);
            }
            else
            {
                a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
            }
        }
    }
    cmatrixrndorthogonalfromtheright(a, n, n, _state);
}


/*************************************************************************
Generation of random NxN complex matrix with given condition number C and
norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndcond(ae_int_t n,
     double c,
     /* Complex */ ae_matrix* a,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double l1;
    double l2;
    hqrndstate state;
    ae_complex v;

    ae_frame_make(_state, &_frame_block);
    memset(&state, 0, sizeof(state));
    ae_matrix_clear(a);
    _hqrndstate_init(&state, _state, ae_true);

    ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "CMatrixRndCond: N<1 or C<1!", _state);
    ae_matrix_set_length(a, n, n, _state);
    if( n==1 )
    {

        /*
         * special case
         */
        hqrndrandomize(&state, _state);
        hqrndunit2(&state, &v.x, &v.y, _state);
        a->ptr.pp_complex[0][0] = v;
        ae_frame_leave(_state);
        return;
    }
    hqrndrandomize(&state, _state);
    l1 = (double)(0);
    l2 = ae_log(1/c, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
        }
    }
    a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
    for(i=1; i<=n-2; i++)
    {
        a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(hqrnduniformr(&state, _state)*(l2-l1)+l1, _state));
    }
    a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));
    cmatrixrndorthogonalfromtheleft(a, n, n, _state);
    cmatrixrndorthogonalfromtheright(a, n, n, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Generation of random NxN symmetric matrix with given condition number  and
norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void smatrixrndcond(ae_int_t n,
     double c,
     /* Real    */ ae_matrix* a,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double l1;
    double l2;
    hqrndstate rs;

    ae_frame_make(_state, &_frame_block);
    memset(&rs, 0, sizeof(rs));
    ae_matrix_clear(a);
    _hqrndstate_init(&rs, _state, ae_true);

    ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "SMatrixRndCond: N<1 or C<1!", _state);
    ae_matrix_set_length(a, n, n, _state);
    if( n==1 )
    {

        /*
         * special case
         */
        a->ptr.pp_double[0][0] = (double)(2*ae_randominteger(2, _state)-1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Prepare matrix
     */
    hqrndrandomize(&rs, _state);
    l1 = (double)(0);
    l2 = ae_log(1/c, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_double[i][j] = (double)(0);
        }
    }
    a->ptr.pp_double[0][0] = ae_exp(l1, _state);
    for(i=1; i<=n-2; i++)
    {
        a->ptr.pp_double[i][i] = (2*hqrnduniformi(&rs, 2, _state)-1)*ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state);
    }
    a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);

    /*
     * Multiply
     */
    smatrixrndmultiply(a, n, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Generation of random NxN symmetric positive definite matrix with given
condition number and norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random SPD matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void spdmatrixrndcond(ae_int_t n,
     double c,
     /* Real    */ ae_matrix* a,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double l1;
    double l2;
    hqrndstate rs;

    ae_frame_make(_state, &_frame_block);
    memset(&rs, 0, sizeof(rs));
    ae_matrix_clear(a);
    _hqrndstate_init(&rs, _state, ae_true);


    /*
     * Special cases
     */
    if( n<=0||ae_fp_less(c,(double)(1)) )
    {
        ae_frame_leave(_state);
        return;
    }
    ae_matrix_set_length(a, n, n, _state);
    if( n==1 )
    {
        a->ptr.pp_double[0][0] = (double)(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Prepare matrix
     */
    hqrndrandomize(&rs, _state);
    l1 = (double)(0);
    l2 = ae_log(1/c, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_double[i][j] = (double)(0);
        }
    }
    a->ptr.pp_double[0][0] = ae_exp(l1, _state);
    for(i=1; i<=n-2; i++)
    {
        a->ptr.pp_double[i][i] = ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state);
    }
    a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);

    /*
     * Multiply
     */
    smatrixrndmultiply(a, n, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Generation of random NxN Hermitian matrix with given condition number  and
norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void hmatrixrndcond(ae_int_t n,
     double c,
     /* Complex */ ae_matrix* a,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double l1;
    double l2;
    hqrndstate rs;

    ae_frame_make(_state, &_frame_block);
    memset(&rs, 0, sizeof(rs));
    ae_matrix_clear(a);
    _hqrndstate_init(&rs, _state, ae_true);

    ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "HMatrixRndCond: N<1 or C<1!", _state);
    ae_matrix_set_length(a, n, n, _state);
    if( n==1 )
    {

        /*
         * special case
         */
        a->ptr.pp_complex[0][0] = ae_complex_from_i(2*ae_randominteger(2, _state)-1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Prepare matrix
     */
    hqrndrandomize(&rs, _state);
    l1 = (double)(0);
    l2 = ae_log(1/c, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
        }
    }
    a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
    for(i=1; i<=n-2; i++)
    {
        a->ptr.pp_complex[i][i] = ae_complex_from_d((2*hqrnduniformi(&rs, 2, _state)-1)*ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state));
    }
    a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));

    /*
     * Multiply
     */
    hmatrixrndmultiply(a, n, _state);

    /*
     * post-process to ensure that matrix diagonal is real
     */
    for(i=0; i<=n-1; i++)
    {
        a->ptr.pp_complex[i][i].y = (double)(0);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Generation of random NxN Hermitian positive definite matrix with given
condition number and norm2(A)=1

INPUT PARAMETERS:
    N   -   matrix size
    C   -   condition number (in 2-norm)

OUTPUT PARAMETERS:
    A   -   random HPD matrix with norm2(A)=1 and cond(A)=C

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void hpdmatrixrndcond(ae_int_t n,
     double c,
     /* Complex */ ae_matrix* a,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double l1;
    double l2;
    hqrndstate rs;

    ae_frame_make(_state, &_frame_block);
    memset(&rs, 0, sizeof(rs));
    ae_matrix_clear(a);
    _hqrndstate_init(&rs, _state, ae_true);


    /*
     * Special cases
     */
    if( n<=0||ae_fp_less(c,(double)(1)) )
    {
        ae_frame_leave(_state);
        return;
    }
    ae_matrix_set_length(a, n, n, _state);
    if( n==1 )
    {
        a->ptr.pp_complex[0][0] = ae_complex_from_i(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Prepare matrix
     */
    hqrndrandomize(&rs, _state);
    l1 = (double)(0);
    l2 = ae_log(1/c, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
        }
    }
    a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
    for(i=1; i<=n-2; i++)
    {
        a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state));
    }
    a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));

    /*
     * Multiply
     */
    hmatrixrndmultiply(a, n, _state);

    /*
     * post-process to ensure that matrix diagonal is real
     */
    for(i=0; i<=n-1; i++)
    {
        a->ptr.pp_complex[i][i].y = (double)(0);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndorthogonalfromtheright(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    double tau;
    double lambdav;
    ae_int_t s;
    ae_int_t i;
    double u1;
    double u2;
    ae_vector w;
    ae_vector v;
    hqrndstate state;

    ae_frame_make(_state, &_frame_block);
    memset(&w, 0, sizeof(w));
    memset(&v, 0, sizeof(v));
    memset(&state, 0, sizeof(state));
    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    _hqrndstate_init(&state, _state, ae_true);

    ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
    if( n==1 )
    {

        /*
         * Special case
         */
        tau = (double)(2*ae_randominteger(2, _state)-1);
        for(i=0; i<=m-1; i++)
        {
            a->ptr.pp_double[i][0] = a->ptr.pp_double[i][0]*tau;
        }
        ae_frame_leave(_state);
        return;
    }

    /*
     * General case.
     * First pass.
     */
    ae_vector_set_length(&w, m, _state);
    ae_vector_set_length(&v, n+1, _state);
    hqrndrandomize(&state, _state);
    for(s=2; s<=n; s++)
    {

        /*
         * Prepare random normal v
         */
        do
        {
            i = 1;
            while(i<=s)
            {
                hqrndnormal2(&state, &u1, &u2, _state);
                v.ptr.p_double[i] = u1;
                if( i+1<=s )
                {
                    v.ptr.p_double[i+1] = u2;
                }
                i = i+2;
            }
            lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
        }
        while(ae_fp_eq(lambdav,(double)(0)));

        /*
         * Prepare and apply reflection
         */
        generatereflection(&v, s, &tau, _state);
        v.ptr.p_double[1] = (double)(1);
        applyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state);
    }

    /*
     * Second pass.
     */
    for(i=0; i<=n-1; i++)
    {
        tau = (double)(2*hqrnduniformi(&state, 2, _state)-1);
        ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,m-1), tau);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   Q*A, where Q is random MxM orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void rmatrixrndorthogonalfromtheleft(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    double tau;
    double lambdav;
    ae_int_t s;
    ae_int_t i;
    ae_int_t j;
    double u1;
    double u2;
    ae_vector w;
    ae_vector v;
    hqrndstate state;

    ae_frame_make(_state, &_frame_block);
    memset(&w, 0, sizeof(w));
    memset(&v, 0, sizeof(v));
    memset(&state, 0, sizeof(state));
    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    _hqrndstate_init(&state, _state, ae_true);

    ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
    if( m==1 )
    {

        /*
         * special case
         */
        tau = (double)(2*ae_randominteger(2, _state)-1);
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_double[0][j] = a->ptr.pp_double[0][j]*tau;
        }
        ae_frame_leave(_state);
        return;
    }

    /*
     * General case.
     * First pass.
     */
    ae_vector_set_length(&w, n, _state);
    ae_vector_set_length(&v, m+1, _state);
    hqrndrandomize(&state, _state);
    for(s=2; s<=m; s++)
    {

        /*
         * Prepare random normal v
         */
        do
        {
            i = 1;
            while(i<=s)
            {
                hqrndnormal2(&state, &u1, &u2, _state);
                v.ptr.p_double[i] = u1;
                if( i+1<=s )
                {
                    v.ptr.p_double[i+1] = u2;
                }
                i = i+2;
            }
            lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
        }
        while(ae_fp_eq(lambdav,(double)(0)));

        /*
         * Prepare and apply reflection
         */
        generatereflection(&v, s, &tau, _state);
        v.ptr.p_double[1] = (double)(1);
        applyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state);
    }

    /*
     * Second pass.
     */
    for(i=0; i<=m-1; i++)
    {
        tau = (double)(2*hqrnduniformi(&state, 2, _state)-1);
        ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Multiplication of MxN complex matrix by NxN random Haar distributed
complex orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndorthogonalfromtheright(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_complex lambdav;
    ae_complex tau;
    ae_int_t s;
    ae_int_t i;
    ae_vector w;
    ae_vector v;
    hqrndstate state;

    ae_frame_make(_state, &_frame_block);
    memset(&w, 0, sizeof(w));
    memset(&v, 0, sizeof(v));
    memset(&state, 0, sizeof(state));
    ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
    _hqrndstate_init(&state, _state, ae_true);

    ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
    if( n==1 )
    {

        /*
         * Special case
         */
        hqrndrandomize(&state, _state);
        hqrndunit2(&state, &tau.x, &tau.y, _state);
        for(i=0; i<=m-1; i++)
        {
            a->ptr.pp_complex[i][0] = ae_c_mul(a->ptr.pp_complex[i][0],tau);
        }
        ae_frame_leave(_state);
        return;
    }

    /*
     * General case.
     * First pass.
     */
    ae_vector_set_length(&w, m, _state);
    ae_vector_set_length(&v, n+1, _state);
    hqrndrandomize(&state, _state);
    for(s=2; s<=n; s++)
    {

        /*
         * Prepare random normal v
         */
        do
        {
            for(i=1; i<=s; i++)
            {
                hqrndnormal2(&state, &tau.x, &tau.y, _state);
                v.ptr.p_complex[i] = tau;
            }
            lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
        }
        while(ae_c_eq_d(lambdav,(double)(0)));

        /*
         * Prepare and apply reflection
         */
        complexgeneratereflection(&v, s, &tau, _state);
        v.ptr.p_complex[1] = ae_complex_from_i(1);
        complexapplyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state);
    }

    /*
     * Second pass.
     */
    for(i=0; i<=n-1; i++)
    {
        hqrndunit2(&state, &tau.x, &tau.y, _state);
        ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,m-1), tau);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Multiplication of MxN complex matrix by MxM random Haar distributed
complex orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..M-1, 0..N-1]
    M, N-   matrix size

OUTPUT PARAMETERS:
    A   -   Q*A, where Q is random MxM orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void cmatrixrndorthogonalfromtheleft(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_complex tau;
    ae_complex lambdav;
    ae_int_t s;
    ae_int_t i;
    ae_int_t j;
    ae_vector w;
    ae_vector v;
    hqrndstate state;

    ae_frame_make(_state, &_frame_block);
    memset(&w, 0, sizeof(w));
    memset(&v, 0, sizeof(v));
    memset(&state, 0, sizeof(state));
    ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
    _hqrndstate_init(&state, _state, ae_true);

    ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
    if( m==1 )
    {

        /*
         * special case
         */
        hqrndrandomize(&state, _state);
        hqrndunit2(&state, &tau.x, &tau.y, _state);
        for(j=0; j<=n-1; j++)
        {
            a->ptr.pp_complex[0][j] = ae_c_mul(a->ptr.pp_complex[0][j],tau);
        }
        ae_frame_leave(_state);
        return;
    }

    /*
     * General case.
     * First pass.
     */
    ae_vector_set_length(&w, n, _state);
    ae_vector_set_length(&v, m+1, _state);
    hqrndrandomize(&state, _state);
    for(s=2; s<=m; s++)
    {

        /*
         * Prepare random normal v
         */
        do
        {
            for(i=1; i<=s; i++)
            {
                hqrndnormal2(&state, &tau.x, &tau.y, _state);
                v.ptr.p_complex[i] = tau;
            }
            lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
        }
        while(ae_c_eq_d(lambdav,(double)(0)));

        /*
         * Prepare and apply reflection
         */
        complexgeneratereflection(&v, s, &tau, _state);
        v.ptr.p_complex[1] = ae_complex_from_i(1);
        complexapplyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state);
    }

    /*
     * Second pass.
     */
    for(i=0; i<=m-1; i++)
    {
        hqrndunit2(&state, &tau.x, &tau.y, _state);
        ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Symmetric multiplication of NxN matrix by random Haar distributed
orthogonal  matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..N-1, 0..N-1]
    N   -   matrix size

OUTPUT PARAMETERS:
    A   -   Q'*A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void smatrixrndmultiply(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    double tau;
    double lambdav;
    ae_int_t s;
    ae_int_t i;
    double u1;
    double u2;
    ae_vector w;
    ae_vector v;
    hqrndstate state;

    ae_frame_make(_state, &_frame_block);
    memset(&w, 0, sizeof(w));
    memset(&v, 0, sizeof(v));
    memset(&state, 0, sizeof(state));
    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    _hqrndstate_init(&state, _state, ae_true);


    /*
     * General case.
     */
    ae_vector_set_length(&w, n, _state);
    ae_vector_set_length(&v, n+1, _state);
    hqrndrandomize(&state, _state);
    for(s=2; s<=n; s++)
    {

        /*
         * Prepare random normal v
         */
        do
        {
            i = 1;
            while(i<=s)
            {
                hqrndnormal2(&state, &u1, &u2, _state);
                v.ptr.p_double[i] = u1;
                if( i+1<=s )
                {
                    v.ptr.p_double[i+1] = u2;
                }
                i = i+2;
            }
            lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
        }
        while(ae_fp_eq(lambdav,(double)(0)));

        /*
         * Prepare and apply reflection
         */
        generatereflection(&v, s, &tau, _state);
        v.ptr.p_double[1] = (double)(1);
        applyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state);
        applyreflectionfromtheleft(a, tau, &v, n-s, n-1, 0, n-1, &w, _state);
    }

    /*
     * Second pass.
     */
    for(i=0; i<=n-1; i++)
    {
        tau = (double)(2*hqrnduniformi(&state, 2, _state)-1);
        ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,n-1), tau);
        ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau);
    }

    /*
     * Copy upper triangle to lower
     */
    for(i=0; i<=n-2; i++)
    {
        ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1));
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Hermitian multiplication of NxN matrix by random Haar distributed
complex orthogonal matrix

INPUT PARAMETERS:
    A   -   matrix, array[0..N-1, 0..N-1]
    N   -   matrix size

OUTPUT PARAMETERS:
    A   -   Q^H*A*Q, where Q is random NxN orthogonal matrix

  -- ALGLIB routine --
     04.12.2009
     Bochkanov Sergey
*************************************************************************/
void hmatrixrndmultiply(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_complex tau;
    ae_complex lambdav;
    ae_int_t s;
    ae_int_t i;
    ae_vector w;
    ae_vector v;
    hqrndstate state;

    ae_frame_make(_state, &_frame_block);
    memset(&w, 0, sizeof(w));
    memset(&v, 0, sizeof(v));
    memset(&state, 0, sizeof(state));
    ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
    _hqrndstate_init(&state, _state, ae_true);


    /*
     * General case.
     */
    ae_vector_set_length(&w, n, _state);
    ae_vector_set_length(&v, n+1, _state);
    hqrndrandomize(&state, _state);
    for(s=2; s<=n; s++)
    {

        /*
         * Prepare random normal v
         */
        do
        {
            for(i=1; i<=s; i++)
            {
                hqrndnormal2(&state, &tau.x, &tau.y, _state);
                v.ptr.p_complex[i] = tau;
            }
            lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
        }
        while(ae_c_eq_d(lambdav,(double)(0)));

        /*
         * Prepare and apply reflection
         */
        complexgeneratereflection(&v, s, &tau, _state);
        v.ptr.p_complex[1] = ae_complex_from_i(1);
        complexapplyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state);
        complexapplyreflectionfromtheleft(a, ae_c_conj(tau, _state), &v, n-s, n-1, 0, n-1, &w, _state);
    }

    /*
     * Second pass.
     */
    for(i=0; i<=n-1; i++)
    {
        hqrndunit2(&state, &tau.x, &tau.y, _state);
        ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,n-1), tau);
        tau = ae_c_conj(tau, _state);
        ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau);
    }

    /*
     * Change all values from lower triangle by complex-conjugate values
     * from upper one
     */
    for(i=0; i<=n-2; i++)
    {
        ae_v_cmove(&a->ptr.pp_complex[i+1][i], a->stride, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1));
    }
    for(s=0; s<=n-2; s++)
    {
        for(i=s+1; i<=n-1; i++)
        {
            a->ptr.pp_complex[i][s].y = -a->ptr.pp_complex[i][s].y;
        }
    }
    ae_frame_leave(_state);
}


#endif
#if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
This function creates sparse matrix in a Hash-Table format.

This function creates Hast-Table matrix, which can be  converted  to  CRS
format after its initialization is over. Typical  usage  scenario  for  a
sparse matrix is:
1. creation in a Hash-Table format
2. insertion of the matrix elements
3. conversion to the CRS representation
4. matrix is passed to some linear algebra algorithm

Some  information  about  different matrix formats can be found below, in
the "NOTES" section.

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    K           -   K>=0, expected number of non-zero elements in a matrix.
                    K can be inexact approximation, can be less than actual
                    number  of  elements  (table will grow when needed) or
                    even zero).
                    It is important to understand that although hash-table
                    may grow automatically, it is better to  provide  good
                    estimate of data size.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    All elements of the matrix are zero.

NOTE 1

Hash-tables use memory inefficiently, and they have to keep  some  amount
of the "spare memory" in order to have good performance. Hash  table  for
matrix with K non-zero elements will  need  C*K*(8+2*sizeof(int))  bytes,
where C is a small constant, about 1.5-2 in magnitude.

CRS storage, from the other side, is  more  memory-efficient,  and  needs
just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number  of  rows
in a matrix.

When you convert from the Hash-Table to CRS  representation, all unneeded
memory will be freed.

NOTE 2

Comments of SparseMatrix structure outline  information  about  different
sparse storage formats. We recommend you to read them before starting  to
use ALGLIB sparse matrices.

NOTE 3

This function completely  overwrites S with new sparse matrix. Previously
allocated storage is NOT reused. If you  want  to reuse already allocated
memory, call SparseCreateBuf function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecreate(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     sparsematrix* s,
     ae_state *_state)
{

    _sparsematrix_clear(s);

    sparsecreatebuf(m, n, k, s, _state);
}


/*************************************************************************
This version of SparseCreate function creates sparse matrix in Hash-Table
format, reusing previously allocated storage as much  as  possible.  Read
comments for SparseCreate() for more information.

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    K           -   K>=0, expected number of non-zero elements in a matrix.
                    K can be inexact approximation, can be less than actual
                    number  of  elements  (table will grow when needed) or
                    even zero).
                    It is important to understand that although hash-table
                    may grow automatically, it is better to  provide  good
                    estimate of data size.
    S           -   SparseMatrix structure which MAY contain some  already
                    allocated storage.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    All elements of the matrix are zero.
                    Previously allocated storage is reused, if  its  size
                    is compatible with expected number of non-zeros K.

  -- ALGLIB PROJECT --
     Copyright 14.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatebuf(ae_int_t m,
     ae_int_t n,
     ae_int_t k,
     sparsematrix* s,
     ae_state *_state)
{
    ae_int_t i;


    ae_assert(m>0, "SparseCreateBuf: M<=0", _state);
    ae_assert(n>0, "SparseCreateBuf: N<=0", _state);
    ae_assert(k>=0, "SparseCreateBuf: K<0", _state);

    /*
     * Hash-table size is max(existing_size,requested_size)
     *
     * NOTE: it is important to use ALL available memory for hash table
     *       because it is impossible to efficiently reallocate table
     *       without temporary storage. So, if we want table with up to
     *       1.000.000 elements, we have to create such table from the
     *       very beginning. Otherwise, the very idea of memory reuse
     *       will be compromised.
     */
    s->tablesize = ae_round(k/sparse_desiredloadfactor+sparse_additional, _state);
    rvectorsetlengthatleast(&s->vals, s->tablesize, _state);
    s->tablesize = s->vals.cnt;

    /*
     * Initialize other fields
     */
    s->matrixtype = 0;
    s->m = m;
    s->n = n;
    s->nfree = s->tablesize;
    ivectorsetlengthatleast(&s->idx, 2*s->tablesize, _state);
    for(i=0; i<=s->tablesize-1; i++)
    {
        s->idx.ptr.p_int[2*i] = -1;
    }
}


/*************************************************************************
This function creates sparse matrix in a CRS format (expert function for
situations when you are running out of memory).

This function creates CRS matrix. Typical usage scenario for a CRS matrix
is:
1. creation (you have to tell number of non-zero elements at each row  at
   this moment)
2. insertion of the matrix elements (row by row, from left to right)
3. matrix is passed to some linear algebra algorithm

This function is a memory-efficient alternative to SparseCreate(), but it
is more complex because it requires you to know in advance how large your
matrix is. Some  information about  different matrix formats can be found
in comments on SparseMatrix structure.  We recommend  you  to  read  them
before starting to use ALGLIB sparse matrices..

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    NER         -   number of elements at each row, array[M], NER[I]>=0

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in CRS representation.
                    You have to fill ALL non-zero elements by calling
                    SparseSet() BEFORE you try to use this matrix.

NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
      Previously allocated storage is NOT reused. If you  want  to  reuse
      already allocated memory, call SparseCreateCRSBuf function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecreatecrs(ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* ner,
     sparsematrix* s,
     ae_state *_state)
{
    ae_int_t i;

    _sparsematrix_clear(s);

    ae_assert(m>0, "SparseCreateCRS: M<=0", _state);
    ae_assert(n>0, "SparseCreateCRS: N<=0", _state);
    ae_assert(ner->cnt>=m, "SparseCreateCRS: Length(NER)<M", _state);
    for(i=0; i<=m-1; i++)
    {
        ae_assert(ner->ptr.p_int[i]>=0, "SparseCreateCRS: NER[] contains negative elements", _state);
    }
    sparsecreatecrsbuf(m, n, ner, s, _state);
}


/*************************************************************************
This function creates sparse matrix in a CRS format (expert function  for
situations when you are running out  of  memory).  This  version  of  CRS
matrix creation function may reuse memory already allocated in S.

This function creates CRS matrix. Typical usage scenario for a CRS matrix
is:
1. creation (you have to tell number of non-zero elements at each row  at
   this moment)
2. insertion of the matrix elements (row by row, from left to right)
3. matrix is passed to some linear algebra algorithm

This function is a memory-efficient alternative to SparseCreate(), but it
is more complex because it requires you to know in advance how large your
matrix is. Some  information about  different matrix formats can be found
in comments on SparseMatrix structure.  We recommend  you  to  read  them
before starting to use ALGLIB sparse matrices..

INPUT PARAMETERS
    M           -   number of rows in a matrix, M>=1
    N           -   number of columns in a matrix, N>=1
    NER         -   number of elements at each row, array[M], NER[I]>=0
    S           -   sparse matrix structure with possibly preallocated
                    memory.

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in CRS representation.
                    You have to fill ALL non-zero elements by calling
                    SparseSet() BEFORE you try to use this matrix.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecreatecrsbuf(ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* ner,
     sparsematrix* s,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t noe;


    ae_assert(m>0, "SparseCreateCRSBuf: M<=0", _state);
    ae_assert(n>0, "SparseCreateCRSBuf: N<=0", _state);
    ae_assert(ner->cnt>=m, "SparseCreateCRSBuf: Length(NER)<M", _state);
    noe = 0;
    s->matrixtype = 1;
    s->ninitialized = 0;
    s->m = m;
    s->n = n;
    ivectorsetlengthatleast(&s->ridx, s->m+1, _state);
    s->ridx.ptr.p_int[0] = 0;
    for(i=0; i<=s->m-1; i++)
    {
        ae_assert(ner->ptr.p_int[i]>=0, "SparseCreateCRSBuf: NER[] contains negative elements", _state);
        noe = noe+ner->ptr.p_int[i];
        s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i]+ner->ptr.p_int[i];
    }
    rvectorsetlengthatleast(&s->vals, noe, _state);
    ivectorsetlengthatleast(&s->idx, noe, _state);
    if( noe==0 )
    {
        sparseinitduidx(s, _state);
    }
}


/*************************************************************************
This function creates sparse matrix in  a  SKS  format  (skyline  storage
format). In most cases you do not need this function - CRS format  better
suits most use cases.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    D           -   "bottom" bandwidths, array[M], D[I]>=0.
                    I-th element stores number of non-zeros at I-th  row,
                    below the diagonal (diagonal itself is not  included)
    U           -   "top" bandwidths, array[N], U[I]>=0.
                    I-th element stores number of non-zeros  at I-th row,
                    above the diagonal (diagonal itself  is not included)

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to change their values.

NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
      Previously allocated storage is NOT reused. If you  want  to  reuse
      already allocated memory, call SparseCreateSKSBuf function.

  -- ALGLIB PROJECT --
     Copyright 13.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesks(ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* d,
     /* Integer */ ae_vector* u,
     sparsematrix* s,
     ae_state *_state)
{
    ae_int_t i;

    _sparsematrix_clear(s);

    ae_assert(m>0, "SparseCreateSKS: M<=0", _state);
    ae_assert(n>0, "SparseCreateSKS: N<=0", _state);
    ae_assert(m==n, "SparseCreateSKS: M<>N", _state);
    ae_assert(d->cnt>=m, "SparseCreateSKS: Length(D)<M", _state);
    ae_assert(u->cnt>=n, "SparseCreateSKS: Length(U)<N", _state);
    for(i=0; i<=m-1; i++)
    {
        ae_assert(d->ptr.p_int[i]>=0, "SparseCreateSKS: D[] contains negative elements", _state);
        ae_assert(d->ptr.p_int[i]<=i, "SparseCreateSKS: D[I]>I for some I", _state);
    }
    for(i=0; i<=n-1; i++)
    {
        ae_assert(u->ptr.p_int[i]>=0, "SparseCreateSKS: U[] contains negative elements", _state);
        ae_assert(u->ptr.p_int[i]<=i, "SparseCreateSKS: U[I]>I for some I", _state);
    }
    sparsecreatesksbuf(m, n, d, u, s, _state);
}


/*************************************************************************
This is "buffered"  version  of  SparseCreateSKS()  which  reuses  memory
previously allocated in S (of course, memory is reallocated if needed).

This function creates sparse matrix in  a  SKS  format  (skyline  storage
format). In most cases you do not need this function - CRS format  better
suits most use cases.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    D           -   "bottom" bandwidths, array[M], 0<=D[I]<=I.
                    I-th element stores number of non-zeros at I-th row,
                    below the diagonal (diagonal itself is not included)
    U           -   "top" bandwidths, array[N], 0<=U[I]<=I.
                    I-th element stores number of non-zeros at I-th row,
                    above the diagonal (diagonal itself is not included)

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to change their values.

  -- ALGLIB PROJECT --
     Copyright 13.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesksbuf(ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* d,
     /* Integer */ ae_vector* u,
     sparsematrix* s,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t minmn;
    ae_int_t nz;
    ae_int_t mxd;
    ae_int_t mxu;


    ae_assert(m>0, "SparseCreateSKSBuf: M<=0", _state);
    ae_assert(n>0, "SparseCreateSKSBuf: N<=0", _state);
    ae_assert(m==n, "SparseCreateSKSBuf: M<>N", _state);
    ae_assert(d->cnt>=m, "SparseCreateSKSBuf: Length(D)<M", _state);
    ae_assert(u->cnt>=n, "SparseCreateSKSBuf: Length(U)<N", _state);
    for(i=0; i<=m-1; i++)
    {
        ae_assert(d->ptr.p_int[i]>=0, "SparseCreateSKSBuf: D[] contains negative elements", _state);
        ae_assert(d->ptr.p_int[i]<=i, "SparseCreateSKSBuf: D[I]>I for some I", _state);
    }
    for(i=0; i<=n-1; i++)
    {
        ae_assert(u->ptr.p_int[i]>=0, "SparseCreateSKSBuf: U[] contains negative elements", _state);
        ae_assert(u->ptr.p_int[i]<=i, "SparseCreateSKSBuf: U[I]>I for some I", _state);
    }
    minmn = ae_minint(m, n, _state);
    s->matrixtype = 2;
    s->ninitialized = 0;
    s->m = m;
    s->n = n;
    ivectorsetlengthatleast(&s->ridx, minmn+1, _state);
    s->ridx.ptr.p_int[0] = 0;
    nz = 0;
    for(i=0; i<=minmn-1; i++)
    {
        nz = nz+1+d->ptr.p_int[i]+u->ptr.p_int[i];
        s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i]+1+d->ptr.p_int[i]+u->ptr.p_int[i];
    }
    rvectorsetlengthatleast(&s->vals, nz, _state);
    for(i=0; i<=nz-1; i++)
    {
        s->vals.ptr.p_double[i] = 0.0;
    }
    ivectorsetlengthatleast(&s->didx, m+1, _state);
    mxd = 0;
    for(i=0; i<=m-1; i++)
    {
        s->didx.ptr.p_int[i] = d->ptr.p_int[i];
        mxd = ae_maxint(mxd, d->ptr.p_int[i], _state);
    }
    s->didx.ptr.p_int[m] = mxd;
    ivectorsetlengthatleast(&s->uidx, n+1, _state);
    mxu = 0;
    for(i=0; i<=n-1; i++)
    {
        s->uidx.ptr.p_int[i] = u->ptr.p_int[i];
        mxu = ae_maxint(mxu, u->ptr.p_int[i], _state);
    }
    s->uidx.ptr.p_int[n] = mxu;
}


/*************************************************************************
This function creates sparse matrix in  a  SKS  format  (skyline  storage
format). Unlike more general  sparsecreatesks(),  this  function  creates
sparse matrix with constant bandwidth.

You may want to use this function instead of sparsecreatesks() when  your
matrix has  constant  or  nearly-constant  bandwidth,  and  you  want  to
simplify source code.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    BW          -   matrix bandwidth, BW>=0

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to  change  their values.

NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
      Previously allocated storage is NOT reused. If you  want  to  reuse
      already allocated memory, call sparsecreatesksbandbuf function.

  -- ALGLIB PROJECT --
     Copyright 25.12.2017 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesksband(ae_int_t m,
     ae_int_t n,
     ae_int_t bw,
     sparsematrix* s,
     ae_state *_state)
{

    _sparsematrix_clear(s);

    ae_assert(m>0, "SparseCreateSKSBand: M<=0", _state);
    ae_assert(n>0, "SparseCreateSKSBand: N<=0", _state);
    ae_assert(bw>=0, "SparseCreateSKSBand: BW<0", _state);
    ae_assert(m==n, "SparseCreateSKSBand: M!=N", _state);
    sparsecreatesksbandbuf(m, n, bw, s, _state);
}


/*************************************************************************
This is "buffered" version  of  sparsecreatesksband() which reuses memory
previously allocated in S (of course, memory is reallocated if needed).

You may want to use this function instead  of  sparsecreatesksbuf()  when
your matrix has  constant or nearly-constant  bandwidth,  and you want to
simplify source code.

INPUT PARAMETERS
    M, N        -   number of rows(M) and columns (N) in a matrix:
                    * M=N (as for now, ALGLIB supports only square SKS)
                    * N>=1
                    * M>=1
    BW          -   bandwidth, BW>=0

OUTPUT PARAMETERS
    S           -   sparse M*N matrix in SKS representation.
                    All elements are filled by zeros.
                    You may use sparseset() to change their values.

  -- ALGLIB PROJECT --
     Copyright 13.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecreatesksbandbuf(ae_int_t m,
     ae_int_t n,
     ae_int_t bw,
     sparsematrix* s,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t minmn;
    ae_int_t nz;
    ae_int_t mxd;
    ae_int_t mxu;
    ae_int_t dui;


    ae_assert(m>0, "SparseCreateSKSBandBuf: M<=0", _state);
    ae_assert(n>0, "SparseCreateSKSBandBuf: N<=0", _state);
    ae_assert(m==n, "SparseCreateSKSBandBuf: M!=N", _state);
    ae_assert(bw>=0, "SparseCreateSKSBandBuf: BW<0", _state);
    minmn = ae_minint(m, n, _state);
    s->matrixtype = 2;
    s->ninitialized = 0;
    s->m = m;
    s->n = n;
    ivectorsetlengthatleast(&s->ridx, minmn+1, _state);
    s->ridx.ptr.p_int[0] = 0;
    nz = 0;
    for(i=0; i<=minmn-1; i++)
    {
        dui = ae_minint(i, bw, _state);
        nz = nz+1+2*dui;
        s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i]+1+2*dui;
    }
    rvectorsetlengthatleast(&s->vals, nz, _state);
    for(i=0; i<=nz-1; i++)
    {
        s->vals.ptr.p_double[i] = 0.0;
    }
    ivectorsetlengthatleast(&s->didx, m+1, _state);
    mxd = 0;
    for(i=0; i<=m-1; i++)
    {
        dui = ae_minint(i, bw, _state);
        s->didx.ptr.p_int[i] = dui;
        mxd = ae_maxint(mxd, dui, _state);
    }
    s->didx.ptr.p_int[m] = mxd;
    ivectorsetlengthatleast(&s->uidx, n+1, _state);
    mxu = 0;
    for(i=0; i<=n-1; i++)
    {
        dui = ae_minint(i, bw, _state);
        s->uidx.ptr.p_int[i] = dui;
        mxu = ae_maxint(mxu, dui, _state);
    }
    s->uidx.ptr.p_int[n] = mxu;
}


/*************************************************************************
This function copies S0 to S1.
This function completely deallocates memory owned by S1 before creating a
copy of S0. If you want to reuse memory, use SparseCopyBuf.

NOTE:  this  function  does  not verify its arguments, it just copies all
fields of the structure.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecopy(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
{

    _sparsematrix_clear(s1);

    sparsecopybuf(s0, s1, _state);
}


/*************************************************************************
This function copies S0 to S1.
Memory already allocated in S1 is reused as much as possible.

NOTE:  this  function  does  not verify its arguments, it just copies all
fields of the structure.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsecopybuf(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
{
    ae_int_t l;
    ae_int_t i;


    s1->matrixtype = s0->matrixtype;
    s1->m = s0->m;
    s1->n = s0->n;
    s1->nfree = s0->nfree;
    s1->ninitialized = s0->ninitialized;
    s1->tablesize = s0->tablesize;

    /*
     * Initialization for arrays
     */
    l = s0->vals.cnt;
    rvectorsetlengthatleast(&s1->vals, l, _state);
    for(i=0; i<=l-1; i++)
    {
        s1->vals.ptr.p_double[i] = s0->vals.ptr.p_double[i];
    }
    l = s0->ridx.cnt;
    ivectorsetlengthatleast(&s1->ridx, l, _state);
    for(i=0; i<=l-1; i++)
    {
        s1->ridx.ptr.p_int[i] = s0->ridx.ptr.p_int[i];
    }
    l = s0->idx.cnt;
    ivectorsetlengthatleast(&s1->idx, l, _state);
    for(i=0; i<=l-1; i++)
    {
        s1->idx.ptr.p_int[i] = s0->idx.ptr.p_int[i];
    }

    /*
     * Initalization for CRS-parameters
     */
    l = s0->uidx.cnt;
    ivectorsetlengthatleast(&s1->uidx, l, _state);
    for(i=0; i<=l-1; i++)
    {
        s1->uidx.ptr.p_int[i] = s0->uidx.ptr.p_int[i];
    }
    l = s0->didx.cnt;
    ivectorsetlengthatleast(&s1->didx, l, _state);
    for(i=0; i<=l-1; i++)
    {
        s1->didx.ptr.p_int[i] = s0->didx.ptr.p_int[i];
    }
}


/*************************************************************************
This function efficiently swaps contents of S0 and S1.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparseswap(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
{


    swapi(&s1->matrixtype, &s0->matrixtype, _state);
    swapi(&s1->m, &s0->m, _state);
    swapi(&s1->n, &s0->n, _state);
    swapi(&s1->nfree, &s0->nfree, _state);
    swapi(&s1->ninitialized, &s0->ninitialized, _state);
    swapi(&s1->tablesize, &s0->tablesize, _state);
    ae_swap_vectors(&s1->vals, &s0->vals);
    ae_swap_vectors(&s1->ridx, &s0->ridx);
    ae_swap_vectors(&s1->idx, &s0->idx);
    ae_swap_vectors(&s1->uidx, &s0->uidx);
    ae_swap_vectors(&s1->didx, &s0->didx);
}


/*************************************************************************
This function adds value to S[i,j] - element of the sparse matrix. Matrix
must be in a Hash-Table mode.

In case S[i,j] already exists in the table, V i added to  its  value.  In
case  S[i,j]  is  non-existent,  it  is  inserted  in  the  table.  Table
automatically grows when necessary.

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    Exception will be thrown for CRS matrix.
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N
    V           -   value to add, must be finite number

OUTPUT PARAMETERS
    S           -   modified matrix

NOTE 1:  when  S[i,j]  is exactly zero after modification, it is  deleted
from the table.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseadd(sparsematrix* s,
     ae_int_t i,
     ae_int_t j,
     double v,
     ae_state *_state)
{
    ae_int_t hashcode;
    ae_int_t tcode;
    ae_int_t k;


    ae_assert(s->matrixtype==0, "SparseAdd: matrix must be in the Hash-Table mode to do this operation", _state);
    ae_assert(i>=0, "SparseAdd: I<0", _state);
    ae_assert(i<s->m, "SparseAdd: I>=M", _state);
    ae_assert(j>=0, "SparseAdd: J<0", _state);
    ae_assert(j<s->n, "SparseAdd: J>=N", _state);
    ae_assert(ae_isfinite(v, _state), "SparseAdd: V is not finite number", _state);
    if( ae_fp_eq(v,(double)(0)) )
    {
        return;
    }
    tcode = -1;
    k = s->tablesize;
    if( ae_fp_greater_eq((1-sparse_maxloadfactor)*k,(double)(s->nfree)) )
    {
        sparseresizematrix(s, _state);
        k = s->tablesize;
    }
    hashcode = sparse_hash(i, j, k, _state);
    for(;;)
    {
        if( s->idx.ptr.p_int[2*hashcode]==-1 )
        {
            if( tcode!=-1 )
            {
                hashcode = tcode;
            }
            s->vals.ptr.p_double[hashcode] = v;
            s->idx.ptr.p_int[2*hashcode] = i;
            s->idx.ptr.p_int[2*hashcode+1] = j;
            if( tcode==-1 )
            {
                s->nfree = s->nfree-1;
            }
            return;
        }
        else
        {
            if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
            {
                s->vals.ptr.p_double[hashcode] = s->vals.ptr.p_double[hashcode]+v;
                if( ae_fp_eq(s->vals.ptr.p_double[hashcode],(double)(0)) )
                {
                    s->idx.ptr.p_int[2*hashcode] = -2;
                }
                return;
            }

            /*
             * Is it deleted element?
             */
            if( tcode==-1&&s->idx.ptr.p_int[2*hashcode]==-2 )
            {
                tcode = hashcode;
            }

            /*
             * Next step
             */
            hashcode = (hashcode+1)%k;
        }
    }
}


/*************************************************************************
This function modifies S[i,j] - element of the sparse matrix.

For Hash-based storage format:
* this function can be called at any moment - during matrix initialization
  or later
* new value can be zero or non-zero.  In case new value of S[i,j] is zero,
  this element is deleted from the table.
* this  function  has  no  effect when called with zero V for non-existent
  element.

For CRS-bases storage format:
* this function can be called ONLY DURING MATRIX INITIALIZATION
* zero values are stored in the matrix similarly to non-zero ones
* elements must be initialized in correct order -  from top row to bottom,
  within row - from left to right.

For SKS storage:
* this function can be called at any moment - during matrix initialization
  or later
* zero values are stored in the matrix similarly to non-zero ones
* this function CAN NOT be called for non-existent (outside  of  the  band
  specified during SKS matrix creation) elements. Say, if you created  SKS
  matrix  with  bandwidth=2  and  tried to call sparseset(s,0,10,VAL),  an
  exception will be generated.

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table, SKS or CRS format.
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N
    V           -   value to set, must be finite number, can be zero

OUTPUT PARAMETERS
    S           -   modified matrix

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseset(sparsematrix* s,
     ae_int_t i,
     ae_int_t j,
     double v,
     ae_state *_state)
{
    ae_int_t hashcode;
    ae_int_t tcode;
    ae_int_t k;
    ae_bool b;


    ae_assert((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2, "SparseSet: unsupported matrix storage format", _state);
    ae_assert(i>=0, "SparseSet: I<0", _state);
    ae_assert(i<s->m, "SparseSet: I>=M", _state);
    ae_assert(j>=0, "SparseSet: J<0", _state);
    ae_assert(j<s->n, "SparseSet: J>=N", _state);
    ae_assert(ae_isfinite(v, _state), "SparseSet: V is not finite number", _state);

    /*
     * Hash-table matrix
     */
    if( s->matrixtype==0 )
    {
        tcode = -1;
        k = s->tablesize;
        if( ae_fp_greater_eq((1-sparse_maxloadfactor)*k,(double)(s->nfree)) )
        {
            sparseresizematrix(s, _state);
            k = s->tablesize;
        }
        hashcode = sparse_hash(i, j, k, _state);
        for(;;)
        {
            if( s->idx.ptr.p_int[2*hashcode]==-1 )
            {
                if( ae_fp_neq(v,(double)(0)) )
                {
                    if( tcode!=-1 )
                    {
                        hashcode = tcode;
                    }
                    s->vals.ptr.p_double[hashcode] = v;
                    s->idx.ptr.p_int[2*hashcode] = i;
                    s->idx.ptr.p_int[2*hashcode+1] = j;
                    if( tcode==-1 )
                    {
                        s->nfree = s->nfree-1;
                    }
                }
                return;
            }
            else
            {
                if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
                {
                    if( ae_fp_eq(v,(double)(0)) )
                    {
                        s->idx.ptr.p_int[2*hashcode] = -2;
                    }
                    else
                    {
                        s->vals.ptr.p_double[hashcode] = v;
                    }
                    return;
                }
                if( tcode==-1&&s->idx.ptr.p_int[2*hashcode]==-2 )
                {
                    tcode = hashcode;
                }

                /*
                 * Next step
                 */
                hashcode = (hashcode+1)%k;
            }
        }
    }

    /*
     * CRS matrix
     */
    if( s->matrixtype==1 )
    {
        ae_assert(s->ridx.ptr.p_int[i]<=s->ninitialized, "SparseSet: too few initialized elements at some row (you have promised more when called SparceCreateCRS)", _state);
        ae_assert(s->ridx.ptr.p_int[i+1]>s->ninitialized, "SparseSet: too many initialized elements at some row (you have promised less when called SparceCreateCRS)", _state);
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[i]||s->idx.ptr.p_int[s->ninitialized-1]<j, "SparseSet: incorrect column order (you must fill every row from left to right)", _state);
        s->vals.ptr.p_double[s->ninitialized] = v;
        s->idx.ptr.p_int[s->ninitialized] = j;
        s->ninitialized = s->ninitialized+1;

        /*
         * If matrix has been created then
         * initiale 'S.UIdx' and 'S.DIdx'
         */
        if( s->ninitialized==s->ridx.ptr.p_int[s->m] )
        {
            sparseinitduidx(s, _state);
        }
        return;
    }

    /*
     * SKS matrix
     */
    if( s->matrixtype==2 )
    {
        b = sparserewriteexisting(s, i, j, v, _state);
        ae_assert(b, "SparseSet: an attempt to initialize out-of-band element of the SKS matrix", _state);
        return;
    }
}


/*************************************************************************
This function returns S[i,j] - element of the sparse matrix.  Matrix  can
be in any mode (Hash-Table, CRS, SKS), but this function is less efficient
for CRS matrices. Hash-Table and SKS matrices can find  element  in  O(1)
time, while  CRS  matrices need O(log(RS)) time, where RS is an number of
non-zero elements in a row.

INPUT PARAMETERS
    S           -   sparse M*N matrix
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N

RESULT
    value of S[I,J] or zero (in case no element with such index is found)

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
double sparseget(sparsematrix* s,
     ae_int_t i,
     ae_int_t j,
     ae_state *_state)
{
    ae_int_t hashcode;
    ae_int_t k;
    ae_int_t k0;
    ae_int_t k1;
    double result;


    ae_assert(i>=0, "SparseGet: I<0", _state);
    ae_assert(i<s->m, "SparseGet: I>=M", _state);
    ae_assert(j>=0, "SparseGet: J<0", _state);
    ae_assert(j<s->n, "SparseGet: J>=N", _state);
    result = 0.0;
    if( s->matrixtype==0 )
    {

        /*
         * Hash-based storage
         */
        result = (double)(0);
        k = s->tablesize;
        hashcode = sparse_hash(i, j, k, _state);
        for(;;)
        {
            if( s->idx.ptr.p_int[2*hashcode]==-1 )
            {
                return result;
            }
            if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
            {
                result = s->vals.ptr.p_double[hashcode];
                return result;
            }
            hashcode = (hashcode+1)%k;
        }
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGet: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        k0 = s->ridx.ptr.p_int[i];
        k1 = s->ridx.ptr.p_int[i+1]-1;
        result = (double)(0);
        while(k0<=k1)
        {
            k = (k0+k1)/2;
            if( s->idx.ptr.p_int[k]==j )
            {
                result = s->vals.ptr.p_double[k];
                return result;
            }
            if( s->idx.ptr.p_int[k]<j )
            {
                k0 = k+1;
            }
            else
            {
                k1 = k-1;
            }
        }
        return result;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS
         */
        ae_assert(s->m==s->n, "SparseGet: non-square SKS matrix not supported", _state);
        result = (double)(0);
        if( i==j )
        {

            /*
             * Return diagonal element
             */
            result = s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+s->didx.ptr.p_int[i]];
            return result;
        }
        if( j<i )
        {

            /*
             * Return subdiagonal element at I-th "skyline block"
             */
            k = s->didx.ptr.p_int[i];
            if( i-j<=k )
            {
                result = s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+k+j-i];
            }
        }
        else
        {

            /*
             * Return superdiagonal element at J-th "skyline block"
             */
            k = s->uidx.ptr.p_int[j];
            if( j-i<=k )
            {
                result = s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)];
            }
            return result;
        }
        return result;
    }
    ae_assert(ae_false, "SparseGet: unexpected matrix type", _state);
    return result;
}


/*************************************************************************
This function checks whether S[i,j] is present in the sparse  matrix.  It
returns True even for elements  that  are  numerically  zero  (but  still
have place allocated for them).

The matrix  can be in any mode (Hash-Table, CRS, SKS), but this  function
is less efficient for CRS matrices. Hash-Table and SKS matrices can  find
element in O(1) time, while  CRS  matrices need O(log(RS)) time, where RS
is an number of non-zero elements in a row.

INPUT PARAMETERS
    S           -   sparse M*N matrix
    I           -   row index of the element to modify, 0<=I<M
    J           -   column index of the element to modify, 0<=J<N

RESULT
    whether S[I,J] is present in the data structure or not

  -- ALGLIB PROJECT --
     Copyright 14.10.2020 by Bochkanov Sergey
*************************************************************************/
ae_bool sparseexists(sparsematrix* s,
     ae_int_t i,
     ae_int_t j,
     ae_state *_state)
{
    ae_int_t hashcode;
    ae_int_t k;
    ae_int_t k0;
    ae_int_t k1;
    ae_bool result;


    ae_assert(i>=0, "SparseExists: I<0", _state);
    ae_assert(i<s->m, "SparseExists: I>=M", _state);
    ae_assert(j>=0, "SparseExists: J<0", _state);
    ae_assert(j<s->n, "SparseExists: J>=N", _state);
    result = ae_false;
    if( s->matrixtype==0 )
    {

        /*
         * Hash-based storage
         */
        k = s->tablesize;
        hashcode = sparse_hash(i, j, k, _state);
        for(;;)
        {
            if( s->idx.ptr.p_int[2*hashcode]==-1 )
            {
                return result;
            }
            if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
            {
                result = ae_true;
                return result;
            }
            hashcode = (hashcode+1)%k;
        }
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseExists: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        k0 = s->ridx.ptr.p_int[i];
        k1 = s->ridx.ptr.p_int[i+1]-1;
        while(k0<=k1)
        {
            k = (k0+k1)/2;
            if( s->idx.ptr.p_int[k]==j )
            {
                result = ae_true;
                return result;
            }
            if( s->idx.ptr.p_int[k]<j )
            {
                k0 = k+1;
            }
            else
            {
                k1 = k-1;
            }
        }
        return result;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS
         */
        ae_assert(s->m==s->n, "SparseExists: non-square SKS matrix not supported", _state);
        if( i==j )
        {

            /*
             * Return diagonal element
             */
            result = ae_true;
            return result;
        }
        if( j<i )
        {

            /*
             * Return subdiagonal element at I-th "skyline block"
             */
            if( i-j<=s->didx.ptr.p_int[i] )
            {
                result = ae_true;
            }
        }
        else
        {

            /*
             * Return superdiagonal element at J-th "skyline block"
             */
            if( j-i<=s->uidx.ptr.p_int[j] )
            {
                result = ae_true;
            }
            return result;
        }
        return result;
    }
    ae_assert(ae_false, "SparseExists: unexpected matrix type", _state);
    return result;
}


/*************************************************************************
This function returns I-th diagonal element of the sparse matrix.

Matrix can be in any mode (Hash-Table or CRS storage), but this  function
is most efficient for CRS matrices - it requires less than 50 CPU  cycles
to extract diagonal element. For Hash-Table matrices we still  have  O(1)
query time, but function is many times slower.

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table representation.
                    Exception will be thrown for CRS matrix.
    I           -   index of the element to modify, 0<=I<min(M,N)

RESULT
    value of S[I,I] or zero (in case no element with such index is found)

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
double sparsegetdiagonal(sparsematrix* s, ae_int_t i, ae_state *_state)
{
    double result;


    ae_assert(i>=0, "SparseGetDiagonal: I<0", _state);
    ae_assert(i<s->m, "SparseGetDiagonal: I>=M", _state);
    ae_assert(i<s->n, "SparseGetDiagonal: I>=N", _state);
    result = (double)(0);
    if( s->matrixtype==0 )
    {
        result = sparseget(s, i, i, _state);
        return result;
    }
    if( s->matrixtype==1 )
    {
        if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
        {
            result = s->vals.ptr.p_double[s->didx.ptr.p_int[i]];
        }
        return result;
    }
    if( s->matrixtype==2 )
    {
        ae_assert(s->m==s->n, "SparseGetDiagonal: non-square SKS matrix not supported", _state);
        result = s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+s->didx.ptr.p_int[i]];
        return result;
    }
    ae_assert(ae_false, "SparseGetDiagonal: unexpected matrix type", _state);
    return result;
}


/*************************************************************************
This function calculates matrix-vector product  S*x.  Matrix  S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    Y           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y           -   array[M], S*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemv(sparsematrix* s,
     /* Real    */ ae_vector* x,
     /* Real    */ ae_vector* y,
     ae_state *_state)
{
    double tval;
    double v;
    double vv;
    ae_int_t i;
    ae_int_t j;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t lt1;
    ae_int_t rt1;
    ae_int_t n;
    ae_int_t m;
    ae_int_t d;
    ae_int_t u;
    ae_int_t ri;
    ae_int_t ri1;


    ae_assert(x->cnt>=s->n, "SparseMV: length(X)<N", _state);
    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    rvectorsetlengthatleast(y, s->m, _state);
    n = s->n;
    m = s->m;
    if( s->matrixtype==1 )
    {

        /*
         * CRS format.
         * Perform integrity check.
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);

        /*
         * Try vendor kernels
         */
        if( sparsegemvcrsmkl(0, s->m, s->n, 1.0, &s->vals, &s->idx, &s->ridx, x, 0, 0.0, y, 0, _state) )
        {
            return;
        }

        /*
         * Our own implementation
         */
        for(i=0; i<=m-1; i++)
        {
            tval = (double)(0);
            lt = s->ridx.ptr.p_int[i];
            rt = s->ridx.ptr.p_int[i+1]-1;
            for(j=lt; j<=rt; j++)
            {
                tval = tval+x->ptr.p_double[s->idx.ptr.p_int[j]]*s->vals.ptr.p_double[j];
            }
            y->ptr.p_double[i] = tval;
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(s->m==s->n, "SparseMV: non-square SKS matrices are not supported", _state);
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            v = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
            if( d>0 )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                v = v+vv;
            }
            y->ptr.p_double[i] = v;
            if( u>0 )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                v = x->ptr.p_double[i];
                ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
            }
        }
        return;
    }
}


/*************************************************************************
This function calculates matrix-vector product  S^T*x. Matrix S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    X           -   array[M], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least M, but we do not check for NAN's or INF's.
    Y           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y           -   array[N], S^T*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemtv(sparsematrix* s,
     /* Real    */ ae_vector* x,
     /* Real    */ ae_vector* y,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t ct;
    ae_int_t lt1;
    ae_int_t rt1;
    double v;
    double vv;
    ae_int_t n;
    ae_int_t m;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t d;
    ae_int_t u;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMTV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(x->cnt>=s->m, "SparseMTV: Length(X)<M", _state);
    n = s->n;
    m = s->m;
    rvectorsetlengthatleast(y, n, _state);
    for(i=0; i<=n-1; i++)
    {
        y->ptr.p_double[i] = (double)(0);
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         * Perform integrity check.
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[m], "SparseMTV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);

        /*
         * Try vendor kernels
         */
        if( sparsegemvcrsmkl(1, s->m, s->n, 1.0, &s->vals, &s->idx, &s->ridx, x, 0, 0.0, y, 0, _state) )
        {
            return;
        }

        /*
         * Our own implementation
         */
        for(i=0; i<=m-1; i++)
        {
            lt = s->ridx.ptr.p_int[i];
            rt = s->ridx.ptr.p_int[i+1];
            v = x->ptr.p_double[i];
            for(j=lt; j<=rt-1; j++)
            {
                ct = s->idx.ptr.p_int[j];
                y->ptr.p_double[ct] = y->ptr.p_double[ct]+v*s->vals.ptr.p_double[j];
            }
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(s->m==s->n, "SparseMV: non-square SKS matrices are not supported", _state);
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            if( d>0 )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                v = x->ptr.p_double[i];
                ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
            }
            v = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
            if( u>0 )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                v = v+vv;
            }
            y->ptr.p_double[i] = v;
        }
        return;
    }
}


/*************************************************************************
This function calculates generalized sparse matrix-vector product

    y := alpha*op(S)*x + beta*y

Matrix S must be stored in CRS or SKS format (exception  will  be  thrown
otherwise). op(S) can be either S or S^T.

NOTE: this  function  expects  Y  to  be  large enough to store result. No
      automatic preallocation happens for smaller arrays.

INPUT PARAMETERS
    S           -   sparse matrix in CRS or SKS format.
    Alpha       -   source coefficient
    OpS         -   operation type:
                    * OpS=0     =>  op(S) = S
                    * OpS=1     =>  op(S) = S^T
    X           -   input vector, must have at least Cols(op(S))+IX elements
    IX          -   subvector offset
    Beta        -   destination coefficient
    Y           -   preallocated output array, must have at least Rows(op(S))+IY elements
    IY          -   subvector offset

OUTPUT PARAMETERS
    Y           -   elements [IY...IY+Rows(op(S))-1] are replaced by result,
                    other elements are not modified

HANDLING OF SPECIAL CASES:
* below M=Rows(op(S)) and N=Cols(op(S)). Although current  ALGLIB  version
  does not allow you to  create  zero-sized  sparse  matrices,  internally
  ALGLIB  can  deal  with  such matrices. So, comments for M or N equal to
  zero are for internal use only.
* if M=0, then subroutine does nothing. It does not even touch arrays.
* if N=0 or Alpha=0.0, then:
  * if Beta=0, then Y is filled by zeros. S and X are  not  referenced  at
    all. Initial values of Y are ignored (we do not  multiply  Y by  zero,
    we just rewrite it by zeros)
  * if Beta<>0, then Y is replaced by Beta*Y
* if M>0, N>0, Alpha<>0, but  Beta=0, then  Y is replaced by alpha*op(S)*x
  initial state of Y  is ignored (rewritten without initial multiplication
  by zeros).

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 10.12.2019 by Bochkanov Sergey
*************************************************************************/
void sparsegemv(sparsematrix* s,
     double alpha,
     ae_int_t ops,
     /* Real    */ ae_vector* x,
     ae_int_t ix,
     double beta,
     /* Real    */ ae_vector* y,
     ae_int_t iy,
     ae_state *_state)
{
    ae_int_t opm;
    ae_int_t opn;
    ae_int_t rawm;
    ae_int_t rawn;
    ae_int_t i;
    ae_int_t j;
    double tval;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t ct;
    ae_int_t d;
    ae_int_t u;
    ae_int_t ri;
    ae_int_t ri1;
    double v;
    double vv;
    ae_int_t lt1;
    ae_int_t rt1;


    ae_assert(ops==0||ops==1, "SparseGEMV: incorrect OpS", _state);
    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseGEMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    if( ops==0 )
    {
        opm = s->m;
        opn = s->n;
    }
    else
    {
        opm = s->n;
        opn = s->m;
    }
    ae_assert(opm>=0&&opn>=0, "SparseGEMV: op(S) has negative size", _state);
    ae_assert(opn==0||x->cnt+ix>=opn, "SparseGEMV: X is too short", _state);
    ae_assert(opm==0||y->cnt+iy>=opm, "SparseGEMV: X is too short", _state);
    rawm = s->m;
    rawn = s->n;

    /*
     * Quick exit strategies
     */
    if( opm==0 )
    {
        return;
    }
    if( ae_fp_neq(beta,(double)(0)) )
    {
        for(i=0; i<=opm-1; i++)
        {
            y->ptr.p_double[iy+i] = beta*y->ptr.p_double[iy+i];
        }
    }
    else
    {
        for(i=0; i<=opm-1; i++)
        {
            y->ptr.p_double[iy+i] = 0.0;
        }
    }
    if( opn==0||ae_fp_eq(alpha,(double)(0)) )
    {
        return;
    }

    /*
     * Now we have OpM>=1, OpN>=1, Alpha<>0
     */
    if( ops==0 )
    {

        /*
         * Compute generalized product y := alpha*S*x + beta*y
         * (with "beta*y" part already computed).
         */
        if( s->matrixtype==1 )
        {

            /*
             * CRS format.
             * Perform integrity check.
             */
            ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGEMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);

            /*
             * Try vendor kernels
             */
            if( sparsegemvcrsmkl(0, s->m, s->n, alpha, &s->vals, &s->idx, &s->ridx, x, ix, 1.0, y, iy, _state) )
            {
                return;
            }

            /*
             * Our own implementation
             */
            for(i=0; i<=rawm-1; i++)
            {
                tval = (double)(0);
                lt = s->ridx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1]-1;
                for(j=lt; j<=rt; j++)
                {
                    tval = tval+x->ptr.p_double[s->idx.ptr.p_int[j]+ix]*s->vals.ptr.p_double[j];
                }
                y->ptr.p_double[i+iy] = alpha*tval+y->ptr.p_double[i+iy];
            }
            return;
        }
        if( s->matrixtype==2 )
        {

            /*
             * SKS format
             */
            ae_assert(s->m==s->n, "SparseMV: non-square SKS matrices are not supported", _state);
            for(i=0; i<=rawn-1; i++)
            {
                ri = s->ridx.ptr.p_int[i];
                ri1 = s->ridx.ptr.p_int[i+1];
                d = s->didx.ptr.p_int[i];
                u = s->uidx.ptr.p_int[i];
                v = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i+ix];
                if( d>0 )
                {
                    lt = ri;
                    rt = ri+d-1;
                    lt1 = i-d+ix;
                    rt1 = i-1+ix;
                    vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                    v = v+vv;
                }
                y->ptr.p_double[i+iy] = alpha*v+y->ptr.p_double[i+iy];
                if( u>0 )
                {
                    lt = ri1-u;
                    rt = ri1-1;
                    lt1 = i-u+iy;
                    rt1 = i-1+iy;
                    v = alpha*x->ptr.p_double[i+ix];
                    ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                }
            }
            return;
        }
    }
    else
    {

        /*
         * Compute generalized product y := alpha*S^T*x + beta*y
         * (with "beta*y" part already computed).
         */
        if( s->matrixtype==1 )
        {

            /*
             * CRS format
             * Perform integrity check.
             */
            ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGEMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);

            /*
             * Try vendor kernels
             */
            if( sparsegemvcrsmkl(1, s->m, s->n, alpha, &s->vals, &s->idx, &s->ridx, x, ix, 1.0, y, iy, _state) )
            {
                return;
            }

            /*
             * Our own implementation
             */
            for(i=0; i<=rawm-1; i++)
            {
                lt = s->ridx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1];
                v = alpha*x->ptr.p_double[i+ix];
                for(j=lt; j<=rt-1; j++)
                {
                    ct = s->idx.ptr.p_int[j]+iy;
                    y->ptr.p_double[ct] = y->ptr.p_double[ct]+v*s->vals.ptr.p_double[j];
                }
            }
            return;
        }
        if( s->matrixtype==2 )
        {

            /*
             * SKS format
             */
            ae_assert(s->m==s->n, "SparseGEMV: non-square SKS matrices are not supported", _state);
            for(i=0; i<=rawn-1; i++)
            {
                ri = s->ridx.ptr.p_int[i];
                ri1 = s->ridx.ptr.p_int[i+1];
                d = s->didx.ptr.p_int[i];
                u = s->uidx.ptr.p_int[i];
                if( d>0 )
                {
                    lt = ri;
                    rt = ri+d-1;
                    lt1 = i-d+iy;
                    rt1 = i-1+iy;
                    v = alpha*x->ptr.p_double[i+ix];
                    ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                }
                v = alpha*s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i+ix];
                if( u>0 )
                {
                    lt = ri1-u;
                    rt = ri1-1;
                    lt1 = i-u+ix;
                    rt1 = i-1+ix;
                    vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                    v = v+alpha*vv;
                }
                y->ptr.p_double[i+iy] = v+y->ptr.p_double[i+iy];
            }
            return;
        }
    }
}


/*************************************************************************
This function simultaneously calculates two matrix-vector products:
    S*x and S^T*x.
S must be square (non-rectangular) matrix stored in  CRS  or  SKS  format
(exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse N*N matrix in CRS or SKS format.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    Y0          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.
    Y1          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y0          -   array[N], S*x
    Y1          -   array[N], S^T*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemv2(sparsematrix* s,
     /* Real    */ ae_vector* x,
     /* Real    */ ae_vector* y0,
     /* Real    */ ae_vector* y1,
     ae_state *_state)
{
    ae_int_t l;
    double tval;
    ae_int_t i;
    ae_int_t j;
    double vx;
    double vs;
    double v;
    double vv;
    double vd0;
    double vd1;
    ae_int_t vi;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t n;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t d;
    ae_int_t u;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t lt1;
    ae_int_t rt1;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMV2: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(s->m==s->n, "SparseMV2: matrix is non-square", _state);
    l = x->cnt;
    ae_assert(l>=s->n, "SparseMV2: Length(X)<N", _state);
    n = s->n;
    rvectorsetlengthatleast(y0, l, _state);
    rvectorsetlengthatleast(y1, l, _state);
    for(i=0; i<=n-1; i++)
    {
        y0->ptr.p_double[i] = (double)(0);
        y1->ptr.p_double[i] = (double)(0);
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseMV2: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        for(i=0; i<=s->m-1; i++)
        {
            tval = (double)(0);
            vx = x->ptr.p_double[i];
            j0 = s->ridx.ptr.p_int[i];
            j1 = s->ridx.ptr.p_int[i+1]-1;
            for(j=j0; j<=j1; j++)
            {
                vi = s->idx.ptr.p_int[j];
                vs = s->vals.ptr.p_double[j];
                tval = tval+x->ptr.p_double[vi]*vs;
                y1->ptr.p_double[vi] = y1->ptr.p_double[vi]+vx*vs;
            }
            y0->ptr.p_double[i] = tval;
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            vd0 = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
            vd1 = vd0;
            if( d>0 )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                v = x->ptr.p_double[i];
                ae_v_addd(&y1->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                vd0 = vd0+vv;
            }
            if( u>0 )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                v = x->ptr.p_double[i];
                ae_v_addd(&y0->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                vd1 = vd1+vv;
            }
            y0->ptr.p_double[i] = vd0;
            y1->ptr.p_double[i] = vd1;
        }
        return;
    }
}


/*************************************************************************
This function calculates matrix-vector product  S*x, when S is  symmetric
matrix. Matrix S  must be stored in CRS or SKS format  (exception will be
thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*M matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is given:
                    * if upper triangle is given,  only   S[i,j] for j>=i
                      are used, and lower triangle is ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for j<=i
                      are used, and upper triangle is ignored.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    Y           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    Y           -   array[M], S*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsesmv(sparsematrix* s,
     ae_bool isupper,
     /* Real    */ ae_vector* x,
     /* Real    */ ae_vector* y,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t id;
    ae_int_t lt;
    ae_int_t rt;
    double v;
    double vv;
    double vy;
    double vx;
    double vd;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t d;
    ae_int_t u;
    ae_int_t lt1;
    ae_int_t rt1;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseSMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(x->cnt>=s->n, "SparseSMV: length(X)<N", _state);
    ae_assert(s->m==s->n, "SparseSMV: non-square matrix", _state);
    n = s->n;
    rvectorsetlengthatleast(y, n, _state);
    for(i=0; i<=n-1; i++)
    {
        y->ptr.p_double[i] = (double)(0);
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseSMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        for(i=0; i<=n-1; i++)
        {
            if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
            {
                y->ptr.p_double[i] = y->ptr.p_double[i]+s->vals.ptr.p_double[s->didx.ptr.p_int[i]]*x->ptr.p_double[s->idx.ptr.p_int[s->didx.ptr.p_int[i]]];
            }
            if( isupper )
            {
                lt = s->uidx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1];
                vy = (double)(0);
                vx = x->ptr.p_double[i];
                for(j=lt; j<=rt-1; j++)
                {
                    id = s->idx.ptr.p_int[j];
                    v = s->vals.ptr.p_double[j];
                    vy = vy+x->ptr.p_double[id]*v;
                    y->ptr.p_double[id] = y->ptr.p_double[id]+vx*v;
                }
                y->ptr.p_double[i] = y->ptr.p_double[i]+vy;
            }
            else
            {
                lt = s->ridx.ptr.p_int[i];
                rt = s->didx.ptr.p_int[i];
                vy = (double)(0);
                vx = x->ptr.p_double[i];
                for(j=lt; j<=rt-1; j++)
                {
                    id = s->idx.ptr.p_int[j];
                    v = s->vals.ptr.p_double[j];
                    vy = vy+x->ptr.p_double[id]*v;
                    y->ptr.p_double[id] = y->ptr.p_double[id]+vx*v;
                }
                y->ptr.p_double[i] = y->ptr.p_double[i]+vy;
            }
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            vd = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
            if( d>0&&!isupper )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                v = x->ptr.p_double[i];
                ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                vd = vd+vv;
            }
            if( u>0&&isupper )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                v = x->ptr.p_double[i];
                ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                vd = vd+vv;
            }
            y->ptr.p_double[i] = vd;
        }
        return;
    }
}


/*************************************************************************
This function calculates vector-matrix-vector product x'*S*x, where  S is
symmetric matrix. Matrix S must be stored in CRS or SKS format (exception
will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*M matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is given:
                    * if upper triangle is given,  only   S[i,j] for j>=i
                      are used, and lower triangle is ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for j<=i
                      are used, and upper triangle is ignored.
    X           -   array[N], input vector. For  performance  reasons  we
                    make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.

RESULT
    x'*S*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 27.01.2014 by Bochkanov Sergey
*************************************************************************/
double sparsevsmv(sparsematrix* s,
     ae_bool isupper,
     /* Real    */ ae_vector* x,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t id;
    ae_int_t lt;
    ae_int_t rt;
    double v;
    double v0;
    double v1;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t d;
    ae_int_t u;
    ae_int_t lt1;
    double result;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseVSMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(x->cnt>=s->n, "SparseVSMV: length(X)<N", _state);
    ae_assert(s->m==s->n, "SparseVSMV: non-square matrix", _state);
    n = s->n;
    result = 0.0;
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseVSMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        for(i=0; i<=n-1; i++)
        {
            if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
            {
                v = x->ptr.p_double[s->idx.ptr.p_int[s->didx.ptr.p_int[i]]];
                result = result+v*s->vals.ptr.p_double[s->didx.ptr.p_int[i]]*v;
            }
            if( isupper )
            {
                lt = s->uidx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1];
            }
            else
            {
                lt = s->ridx.ptr.p_int[i];
                rt = s->didx.ptr.p_int[i];
            }
            v0 = x->ptr.p_double[i];
            for(j=lt; j<=rt-1; j++)
            {
                id = s->idx.ptr.p_int[j];
                v1 = x->ptr.p_double[id];
                v = s->vals.ptr.p_double[j];
                result = result+2*v0*v1*v;
            }
        }
        return result;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            v = x->ptr.p_double[i];
            result = result+v*s->vals.ptr.p_double[ri+d]*v;
            if( d>0&&!isupper )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                k = d-1;
                v0 = x->ptr.p_double[i];
                v = 0.0;
                for(j=0; j<=k; j++)
                {
                    v = v+x->ptr.p_double[lt1+j]*s->vals.ptr.p_double[lt+j];
                }
                result = result+2*v0*v;
            }
            if( u>0&&isupper )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                k = u-1;
                v0 = x->ptr.p_double[i];
                v = 0.0;
                for(j=0; j<=k; j++)
                {
                    v = v+x->ptr.p_double[lt1+j]*s->vals.ptr.p_double[lt+j];
                }
                result = result+2*v0*v;
            }
        }
        return result;
    }
    return result;
}


/*************************************************************************
This function calculates matrix-matrix product  S*A.  Matrix  S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    A           -   array[N][K], input dense matrix. For  performance reasons
                    we make only quick checks - we check that array size
                    is at least N, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B           -   array[M][K], S*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemm(sparsematrix* s,
     /* Real    */ ae_matrix* a,
     ae_int_t k,
     /* Real    */ ae_matrix* b,
     ae_state *_state)
{
    double tval;
    double v;
    ae_int_t id;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k0;
    ae_int_t k1;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t m;
    ae_int_t n;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t lt1;
    ae_int_t rt1;
    ae_int_t d;
    ae_int_t u;
    double vd;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMM: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(a->rows>=s->n, "SparseMM: Rows(A)<N", _state);
    ae_assert(k>0, "SparseMM: K<=0", _state);
    m = s->m;
    n = s->n;
    k1 = k-1;
    rmatrixsetlengthatleast(b, m, k, _state);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=k-1; j++)
        {
            b->ptr.pp_double[i][j] = (double)(0);
        }
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[m], "SparseMM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        if( k<sparse_linalgswitch )
        {
            for(i=0; i<=m-1; i++)
            {
                for(j=0; j<=k-1; j++)
                {
                    tval = (double)(0);
                    lt = s->ridx.ptr.p_int[i];
                    rt = s->ridx.ptr.p_int[i+1];
                    for(k0=lt; k0<=rt-1; k0++)
                    {
                        tval = tval+s->vals.ptr.p_double[k0]*a->ptr.pp_double[s->idx.ptr.p_int[k0]][j];
                    }
                    b->ptr.pp_double[i][j] = tval;
                }
            }
        }
        else
        {
            for(i=0; i<=m-1; i++)
            {
                lt = s->ridx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1];
                for(j=lt; j<=rt-1; j++)
                {
                    id = s->idx.ptr.p_int[j];
                    v = s->vals.ptr.p_double[j];
                    ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[id][0], 1, ae_v_len(0,k-1), v);
                }
            }
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(m==n, "SparseMM: non-square SKS matrices are not supported", _state);
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            if( d>0 )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            if( u>0 )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            vd = s->vals.ptr.p_double[ri+d];
            ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), vd);
        }
        return;
    }
}


/*************************************************************************
This function calculates matrix-matrix product  S^T*A. Matrix S  must  be
stored in CRS or SKS format (exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*N matrix in CRS or SKS format.
    A           -   array[M][K], input dense matrix. For performance reasons
                    we make only quick checks - we check that array size  is
                    at least M, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B           -   array[N][K], S^T*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemtm(sparsematrix* s,
     /* Real    */ ae_matrix* a,
     ae_int_t k,
     /* Real    */ ae_matrix* b,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k0;
    ae_int_t k1;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t ct;
    double v;
    ae_int_t m;
    ae_int_t n;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t lt1;
    ae_int_t rt1;
    ae_int_t d;
    ae_int_t u;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMTM: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(a->rows>=s->m, "SparseMTM: Rows(A)<M", _state);
    ae_assert(k>0, "SparseMTM: K<=0", _state);
    m = s->m;
    n = s->n;
    k1 = k-1;
    rmatrixsetlengthatleast(b, n, k, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=k-1; j++)
        {
            b->ptr.pp_double[i][j] = (double)(0);
        }
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[m], "SparseMTM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        if( k<sparse_linalgswitch )
        {
            for(i=0; i<=m-1; i++)
            {
                lt = s->ridx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1];
                for(k0=lt; k0<=rt-1; k0++)
                {
                    v = s->vals.ptr.p_double[k0];
                    ct = s->idx.ptr.p_int[k0];
                    for(j=0; j<=k-1; j++)
                    {
                        b->ptr.pp_double[ct][j] = b->ptr.pp_double[ct][j]+v*a->ptr.pp_double[i][j];
                    }
                }
            }
        }
        else
        {
            for(i=0; i<=m-1; i++)
            {
                lt = s->ridx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1];
                for(j=lt; j<=rt-1; j++)
                {
                    v = s->vals.ptr.p_double[j];
                    ct = s->idx.ptr.p_int[j];
                    ae_v_addd(&b->ptr.pp_double[ct][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                }
            }
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(m==n, "SparseMTM: non-square SKS matrices are not supported", _state);
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            if( d>0 )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            if( u>0 )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            v = s->vals.ptr.p_double[ri+d];
            ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
        }
        return;
    }
}


/*************************************************************************
This function simultaneously calculates two matrix-matrix products:
    S*A and S^T*A.
S  must  be  square (non-rectangular) matrix stored in CRS or  SKS  format
(exception will be thrown otherwise).

INPUT PARAMETERS
    S           -   sparse N*N matrix in CRS or SKS format.
    A           -   array[N][K], input dense matrix. For performance reasons
                    we make only quick checks - we check that array size  is
                    at least N, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B0          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.
    B1          -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B0          -   array[N][K], S*A
    B1          -   array[N][K], S^T*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsemm2(sparsematrix* s,
     /* Real    */ ae_matrix* a,
     ae_int_t k,
     /* Real    */ ae_matrix* b0,
     /* Real    */ ae_matrix* b1,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k0;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t ct;
    double v;
    double tval;
    ae_int_t n;
    ae_int_t k1;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t lt1;
    ae_int_t rt1;
    ae_int_t d;
    ae_int_t u;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMM2: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(s->m==s->n, "SparseMM2: matrix is non-square", _state);
    ae_assert(a->rows>=s->n, "SparseMM2: Rows(A)<N", _state);
    ae_assert(k>0, "SparseMM2: K<=0", _state);
    n = s->n;
    k1 = k-1;
    rmatrixsetlengthatleast(b0, n, k, _state);
    rmatrixsetlengthatleast(b1, n, k, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=k-1; j++)
        {
            b1->ptr.pp_double[i][j] = (double)(0);
            b0->ptr.pp_double[i][j] = (double)(0);
        }
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseMM2: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        if( k<sparse_linalgswitch )
        {
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=k-1; j++)
                {
                    tval = (double)(0);
                    lt = s->ridx.ptr.p_int[i];
                    rt = s->ridx.ptr.p_int[i+1];
                    v = a->ptr.pp_double[i][j];
                    for(k0=lt; k0<=rt-1; k0++)
                    {
                        ct = s->idx.ptr.p_int[k0];
                        b1->ptr.pp_double[ct][j] = b1->ptr.pp_double[ct][j]+s->vals.ptr.p_double[k0]*v;
                        tval = tval+s->vals.ptr.p_double[k0]*a->ptr.pp_double[ct][j];
                    }
                    b0->ptr.pp_double[i][j] = tval;
                }
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                lt = s->ridx.ptr.p_int[i];
                rt = s->ridx.ptr.p_int[i+1];
                for(j=lt; j<=rt-1; j++)
                {
                    v = s->vals.ptr.p_double[j];
                    ct = s->idx.ptr.p_int[j];
                    ae_v_addd(&b0->ptr.pp_double[i][0], 1, &a->ptr.pp_double[ct][0], 1, ae_v_len(0,k-1), v);
                    ae_v_addd(&b1->ptr.pp_double[ct][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                }
            }
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(s->m==s->n, "SparseMM2: non-square SKS matrices are not supported", _state);
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            if( d>0 )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b0->ptr.pp_double[i][k0] = b0->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
                            b1->ptr.pp_double[j][k0] = b1->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b0->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
                        ae_v_addd(&b1->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            if( u>0 )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b0->ptr.pp_double[j][k0] = b0->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
                            b1->ptr.pp_double[i][k0] = b1->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b0->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                        ae_v_addd(&b1->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            v = s->vals.ptr.p_double[ri+d];
            ae_v_addd(&b0->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
            ae_v_addd(&b1->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
        }
        return;
    }
}


/*************************************************************************
This function calculates matrix-matrix product  S*A, when S  is  symmetric
matrix. Matrix S must be stored in CRS or SKS format  (exception  will  be
thrown otherwise).

INPUT PARAMETERS
    S           -   sparse M*M matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is given:
                    * if upper triangle is given,  only   S[i,j] for j>=i
                      are used, and lower triangle is ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for j<=i
                      are used, and upper triangle is ignored.
    A           -   array[N][K], input dense matrix. For performance reasons
                    we make only quick checks - we check that array size is
                    at least N, but we do not check for NAN's or INF's.
    K           -   number of columns of matrix (A).
    B           -   output buffer, possibly preallocated. In case  buffer
                    size is too small to store  result,  this  buffer  is
                    automatically resized.

OUTPUT PARAMETERS
    B           -   array[M][K], S*A

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparsesmm(sparsematrix* s,
     ae_bool isupper,
     /* Real    */ ae_matrix* a,
     ae_int_t k,
     /* Real    */ ae_matrix* b,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k0;
    ae_int_t id;
    ae_int_t k1;
    ae_int_t lt;
    ae_int_t rt;
    double v;
    double vb;
    double va;
    ae_int_t n;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t lt1;
    ae_int_t rt1;
    ae_int_t d;
    ae_int_t u;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseSMM: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(a->rows>=s->n, "SparseSMM: Rows(X)<N", _state);
    ae_assert(s->m==s->n, "SparseSMM: matrix is non-square", _state);
    n = s->n;
    k1 = k-1;
    rmatrixsetlengthatleast(b, n, k, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=k-1; j++)
        {
            b->ptr.pp_double[i][j] = (double)(0);
        }
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseSMM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        if( k>sparse_linalgswitch )
        {
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=k-1; j++)
                {
                    if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
                    {
                        id = s->didx.ptr.p_int[i];
                        b->ptr.pp_double[i][j] = b->ptr.pp_double[i][j]+s->vals.ptr.p_double[id]*a->ptr.pp_double[s->idx.ptr.p_int[id]][j];
                    }
                    if( isupper )
                    {
                        lt = s->uidx.ptr.p_int[i];
                        rt = s->ridx.ptr.p_int[i+1];
                        vb = (double)(0);
                        va = a->ptr.pp_double[i][j];
                        for(k0=lt; k0<=rt-1; k0++)
                        {
                            id = s->idx.ptr.p_int[k0];
                            v = s->vals.ptr.p_double[k0];
                            vb = vb+a->ptr.pp_double[id][j]*v;
                            b->ptr.pp_double[id][j] = b->ptr.pp_double[id][j]+va*v;
                        }
                        b->ptr.pp_double[i][j] = b->ptr.pp_double[i][j]+vb;
                    }
                    else
                    {
                        lt = s->ridx.ptr.p_int[i];
                        rt = s->didx.ptr.p_int[i];
                        vb = (double)(0);
                        va = a->ptr.pp_double[i][j];
                        for(k0=lt; k0<=rt-1; k0++)
                        {
                            id = s->idx.ptr.p_int[k0];
                            v = s->vals.ptr.p_double[k0];
                            vb = vb+a->ptr.pp_double[id][j]*v;
                            b->ptr.pp_double[id][j] = b->ptr.pp_double[id][j]+va*v;
                        }
                        b->ptr.pp_double[i][j] = b->ptr.pp_double[i][j]+vb;
                    }
                }
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
                {
                    id = s->didx.ptr.p_int[i];
                    v = s->vals.ptr.p_double[id];
                    ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[s->idx.ptr.p_int[id]][0], 1, ae_v_len(0,k-1), v);
                }
                if( isupper )
                {
                    lt = s->uidx.ptr.p_int[i];
                    rt = s->ridx.ptr.p_int[i+1];
                    for(j=lt; j<=rt-1; j++)
                    {
                        id = s->idx.ptr.p_int[j];
                        v = s->vals.ptr.p_double[j];
                        ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[id][0], 1, ae_v_len(0,k-1), v);
                        ae_v_addd(&b->ptr.pp_double[id][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                    }
                }
                else
                {
                    lt = s->ridx.ptr.p_int[i];
                    rt = s->didx.ptr.p_int[i];
                    for(j=lt; j<=rt-1; j++)
                    {
                        id = s->idx.ptr.p_int[j];
                        v = s->vals.ptr.p_double[j];
                        ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[id][0], 1, ae_v_len(0,k-1), v);
                        ae_v_addd(&b->ptr.pp_double[id][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(s->m==s->n, "SparseMM2: non-square SKS matrices are not supported", _state);
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            if( d>0&&!isupper )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
                            b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
                        ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            if( u>0&&isupper )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                for(j=lt1; j<=rt1; j++)
                {
                    v = s->vals.ptr.p_double[lt+(j-lt1)];
                    if( k<sparse_linalgswitch )
                    {

                        /*
                         * Use loop
                         */
                        for(k0=0; k0<=k1; k0++)
                        {
                            b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
                            b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
                        }
                    }
                    else
                    {

                        /*
                         * Use vector operation
                         */
                        ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
                        ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
                    }
                }
            }
            v = s->vals.ptr.p_double[ri+d];
            ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
        }
        return;
    }
}


/*************************************************************************
This function calculates matrix-vector product op(S)*x, when x is  vector,
S is symmetric triangular matrix, op(S) is transposition or no  operation.
Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
otherwise).

INPUT PARAMETERS
    S           -   sparse square matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is used:
                    * if upper triangle is given,  only   S[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    IsUnit      -   unit or non-unit diagonal:
                    * if True, diagonal elements of triangular matrix  are
                      considered equal to 1.0. Actual elements  stored  in
                      S are not referenced at all.
                    * if False, diagonal stored in S is used
    OpType      -   operation type:
                    * if 0, S*x is calculated
                    * if 1, (S^T)*x is calculated (transposition)
    X           -   array[N] which stores input  vector.  For  performance
                    reasons we make only quick  checks  -  we  check  that
                    array  size  is  at  least  N, but we do not check for
                    NAN's or INF's.
    Y           -   possibly  preallocated  input   buffer.  Automatically
                    resized if its size is too small.

OUTPUT PARAMETERS
    Y           -   array[N], op(S)*x

NOTE: this function throws exception when called for non-CRS/SKS  matrix.
You must convert your matrix with SparseConvertToCRS/SKS()  before  using
this function.

  -- ALGLIB PROJECT --
     Copyright 20.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsetrmv(sparsematrix* s,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_vector* x,
     /* Real    */ ae_vector* y,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t j0;
    ae_int_t j1;
    double v;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t d;
    ae_int_t u;
    ae_int_t lt;
    ae_int_t rt;
    ae_int_t lt1;
    ae_int_t rt1;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseTRMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(optype==0||optype==1, "SparseTRMV: incorrect operation type (must be 0 or 1)", _state);
    ae_assert(x->cnt>=s->n, "SparseTRMV: Length(X)<N", _state);
    ae_assert(s->m==s->n, "SparseTRMV: matrix is non-square", _state);
    n = s->n;
    rvectorsetlengthatleast(y, n, _state);
    if( isunit )
    {

        /*
         * Set initial value of y to x
         */
        for(i=0; i<=n-1; i++)
        {
            y->ptr.p_double[i] = x->ptr.p_double[i];
        }
    }
    else
    {

        /*
         * Set initial value of y to 0
         */
        for(i=0; i<=n-1; i++)
        {
            y->ptr.p_double[i] = (double)(0);
        }
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS format
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseTRMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        for(i=0; i<=n-1; i++)
        {

            /*
             * Depending on IsUpper/IsUnit, select range of indexes to process
             */
            if( isupper )
            {
                if( isunit||s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
                {
                    j0 = s->uidx.ptr.p_int[i];
                }
                else
                {
                    j0 = s->didx.ptr.p_int[i];
                }
                j1 = s->ridx.ptr.p_int[i+1]-1;
            }
            else
            {
                j0 = s->ridx.ptr.p_int[i];
                if( isunit||s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
                {
                    j1 = s->didx.ptr.p_int[i]-1;
                }
                else
                {
                    j1 = s->didx.ptr.p_int[i];
                }
            }

            /*
             * Depending on OpType, process subset of I-th row of input matrix
             */
            if( optype==0 )
            {
                v = 0.0;
                for(j=j0; j<=j1; j++)
                {
                    v = v+s->vals.ptr.p_double[j]*x->ptr.p_double[s->idx.ptr.p_int[j]];
                }
                y->ptr.p_double[i] = y->ptr.p_double[i]+v;
            }
            else
            {
                v = x->ptr.p_double[i];
                for(j=j0; j<=j1; j++)
                {
                    k = s->idx.ptr.p_int[j];
                    y->ptr.p_double[k] = y->ptr.p_double[k]+v*s->vals.ptr.p_double[j];
                }
            }
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(s->m==s->n, "SparseTRMV: non-square SKS matrices are not supported", _state);
        for(i=0; i<=n-1; i++)
        {
            ri = s->ridx.ptr.p_int[i];
            ri1 = s->ridx.ptr.p_int[i+1];
            d = s->didx.ptr.p_int[i];
            u = s->uidx.ptr.p_int[i];
            if( !isunit )
            {
                y->ptr.p_double[i] = y->ptr.p_double[i]+s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
            }
            if( d>0&&!isupper )
            {
                lt = ri;
                rt = ri+d-1;
                lt1 = i-d;
                rt1 = i-1;
                if( optype==0 )
                {
                    v = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                    y->ptr.p_double[i] = y->ptr.p_double[i]+v;
                }
                else
                {
                    v = x->ptr.p_double[i];
                    ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                }
            }
            if( u>0&&isupper )
            {
                lt = ri1-u;
                rt = ri1-1;
                lt1 = i-u;
                rt1 = i-1;
                if( optype==0 )
                {
                    v = x->ptr.p_double[i];
                    ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
                }
                else
                {
                    v = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
                    y->ptr.p_double[i] = y->ptr.p_double[i]+v;
                }
            }
        }
        return;
    }
}


/*************************************************************************
This function solves linear system op(S)*y=x  where  x  is  vector,  S  is
symmetric  triangular  matrix,  op(S)  is  transposition  or no operation.
Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
otherwise).

INPUT PARAMETERS
    S           -   sparse square matrix in CRS or SKS format.
    IsUpper     -   whether upper or lower triangle of S is used:
                    * if upper triangle is given,  only   S[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   S[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    IsUnit      -   unit or non-unit diagonal:
                    * if True, diagonal elements of triangular matrix  are
                      considered equal to 1.0. Actual elements  stored  in
                      S are not referenced at all.
                    * if False, diagonal stored in S is used. It  is  your
                      responsibility  to  make  sure  that   diagonal   is
                      non-zero.
    OpType      -   operation type:
                    * if 0, S*x is calculated
                    * if 1, (S^T)*x is calculated (transposition)
    X           -   array[N] which stores input  vector.  For  performance
                    reasons we make only quick  checks  -  we  check  that
                    array  size  is  at  least  N, but we do not check for
                    NAN's or INF's.

OUTPUT PARAMETERS
    X           -   array[N], inv(op(S))*x

NOTE: this function throws exception when called for  non-CRS/SKS  matrix.
      You must convert your matrix  with  SparseConvertToCRS/SKS()  before
      using this function.

NOTE: no assertion or tests are done during algorithm  operation.   It  is
      your responsibility to provide invertible matrix to algorithm.

  -- ALGLIB PROJECT --
     Copyright 20.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsetrsv(sparsematrix* s,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t optype,
     /* Real    */ ae_vector* x,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t fst;
    ae_int_t lst;
    ae_int_t stp;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    double v;
    double vd;
    double v0;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t ri;
    ae_int_t ri1;
    ae_int_t d;
    ae_int_t u;
    ae_int_t lt;
    ae_int_t lt1;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseTRSV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
    ae_assert(optype==0||optype==1, "SparseTRSV: incorrect operation type (must be 0 or 1)", _state);
    ae_assert(x->cnt>=s->n, "SparseTRSV: Length(X)<N", _state);
    ae_assert(s->m==s->n, "SparseTRSV: matrix is non-square", _state);
    n = s->n;
    if( s->matrixtype==1 )
    {

        /*
         * CRS format.
         *
         * Several branches for different combinations of IsUpper and OpType
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseTRSV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        if( optype==0 )
        {

            /*
             * No transposition.
             *
             * S*x=y with upper or lower triangular S.
             */
            v0 = (double)(0);
            if( isupper )
            {
                fst = n-1;
                lst = 0;
                stp = -1;
            }
            else
            {
                fst = 0;
                lst = n-1;
                stp = 1;
            }
            i = fst;
            while((stp>0&&i<=lst)||(stp<0&&i>=lst))
            {

                /*
                 * Select range of indexes to process
                 */
                if( isupper )
                {
                    j0 = s->uidx.ptr.p_int[i];
                    j1 = s->ridx.ptr.p_int[i+1]-1;
                }
                else
                {
                    j0 = s->ridx.ptr.p_int[i];
                    j1 = s->didx.ptr.p_int[i]-1;
                }

                /*
                 * Calculate X[I]
                 */
                v = 0.0;
                for(j=j0; j<=j1; j++)
                {
                    v = v+s->vals.ptr.p_double[j]*x->ptr.p_double[s->idx.ptr.p_int[j]];
                }
                if( !isunit )
                {
                    if( s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
                    {
                        vd = (double)(0);
                    }
                    else
                    {
                        vd = s->vals.ptr.p_double[s->didx.ptr.p_int[i]];
                    }
                }
                else
                {
                    vd = 1.0;
                }
                v = (x->ptr.p_double[i]-v)/vd;
                x->ptr.p_double[i] = v;
                v0 = 0.25*v0+v;

                /*
                 * Next I
                 */
                i = i+stp;
            }
            ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
            return;
        }
        if( optype==1 )
        {

            /*
             * Transposition.
             *
             * (S^T)*x=y with upper or lower triangular S.
             */
            if( isupper )
            {
                fst = 0;
                lst = n-1;
                stp = 1;
            }
            else
            {
                fst = n-1;
                lst = 0;
                stp = -1;
            }
            i = fst;
            v0 = (double)(0);
            while((stp>0&&i<=lst)||(stp<0&&i>=lst))
            {
                v = x->ptr.p_double[i];
                if( v!=0.0 )
                {

                    /*
                     * X[i] already stores A[i,i]*Y[i], the only thing left
                     * is to divide by diagonal element.
                     */
                    if( !isunit )
                    {
                        if( s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
                        {
                            vd = (double)(0);
                        }
                        else
                        {
                            vd = s->vals.ptr.p_double[s->didx.ptr.p_int[i]];
                        }
                    }
                    else
                    {
                        vd = 1.0;
                    }
                    v = v/vd;
                    x->ptr.p_double[i] = v;
                    v0 = 0.25*v0+v;

                    /*
                     * For upper triangular case:
                     *     subtract X[i]*Ai from X[i+1:N-1]
                     *
                     * For lower triangular case:
                     *     subtract X[i]*Ai from X[0:i-1]
                     *
                     * (here Ai is I-th row of original, untransposed A).
                     */
                    if( isupper )
                    {
                        j0 = s->uidx.ptr.p_int[i];
                        j1 = s->ridx.ptr.p_int[i+1]-1;
                    }
                    else
                    {
                        j0 = s->ridx.ptr.p_int[i];
                        j1 = s->didx.ptr.p_int[i]-1;
                    }
                    for(j=j0; j<=j1; j++)
                    {
                        k = s->idx.ptr.p_int[j];
                        x->ptr.p_double[k] = x->ptr.p_double[k]-s->vals.ptr.p_double[j]*v;
                    }
                }

                /*
                 * Next I
                 */
                i = i+stp;
            }
            ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
            return;
        }
        ae_assert(ae_false, "SparseTRSV: internal error", _state);
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS format
         */
        ae_assert(s->m==s->n, "SparseTRSV: non-square SKS matrices are not supported", _state);
        if( (optype==0&&!isupper)||(optype==1&&isupper) )
        {

            /*
             * Lower triangular op(S) (matrix itself can be upper triangular).
             */
            v0 = (double)(0);
            for(i=0; i<=n-1; i++)
            {

                /*
                 * Select range of indexes to process
                 */
                ri = s->ridx.ptr.p_int[i];
                ri1 = s->ridx.ptr.p_int[i+1];
                d = s->didx.ptr.p_int[i];
                u = s->uidx.ptr.p_int[i];
                if( isupper )
                {
                    lt = i-u;
                    lt1 = ri1-u;
                    k = u-1;
                }
                else
                {
                    lt = i-d;
                    lt1 = ri;
                    k = d-1;
                }

                /*
                 * Calculate X[I]
                 */
                v = 0.0;
                for(j=0; j<=k; j++)
                {
                    v = v+s->vals.ptr.p_double[lt1+j]*x->ptr.p_double[lt+j];
                }
                if( isunit )
                {
                    vd = (double)(1);
                }
                else
                {
                    vd = s->vals.ptr.p_double[ri+d];
                }
                v = (x->ptr.p_double[i]-v)/vd;
                x->ptr.p_double[i] = v;
                v0 = 0.25*v0+v;
            }
            ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
            return;
        }
        if( (optype==1&&!isupper)||(optype==0&&isupper) )
        {

            /*
             * Upper triangular op(S) (matrix itself can be lower triangular).
             */
            v0 = (double)(0);
            for(i=n-1; i>=0; i--)
            {
                ri = s->ridx.ptr.p_int[i];
                ri1 = s->ridx.ptr.p_int[i+1];
                d = s->didx.ptr.p_int[i];
                u = s->uidx.ptr.p_int[i];

                /*
                 * X[i] already stores A[i,i]*Y[i], the only thing left
                 * is to divide by diagonal element.
                 */
                if( isunit )
                {
                    vd = (double)(1);
                }
                else
                {
                    vd = s->vals.ptr.p_double[ri+d];
                }
                v = x->ptr.p_double[i]/vd;
                x->ptr.p_double[i] = v;
                v0 = 0.25*v0+v;

                /*
                 * Subtract product of X[i] and I-th column of "effective" A from
                 * unprocessed variables.
                 */
                v = x->ptr.p_double[i];
                if( isupper )
                {
                    lt = i-u;
                    lt1 = ri1-u;
                    k = u-1;
                }
                else
                {
                    lt = i-d;
                    lt1 = ri;
                    k = d-1;
                }
                for(j=0; j<=k; j++)
                {
                    x->ptr.p_double[lt+j] = x->ptr.p_double[lt+j]-v*s->vals.ptr.p_double[lt1+j];
                }
            }
            ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
            return;
        }
        ae_assert(ae_false, "SparseTRSV: internal error", _state);
    }
    ae_assert(ae_false, "SparseTRSV: internal error", _state);
}


/*************************************************************************
This function applies permutation given by permutation table P (as opposed
to product form of permutation) to sparse symmetric  matrix  A,  given  by
either upper or lower triangle: B := P*A*P'.

This function allocates completely new instance of B. Use buffered version
SparseSymmPermTblBuf() if you want to reuse already allocated structure.

INPUT PARAMETERS
    A           -   sparse square matrix in CRS format.
    IsUpper     -   whether upper or lower triangle of A is used:
                    * if upper triangle is given,  only   A[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   A[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    P           -   array[N] which stores permutation table;  P[I]=J means
                    that I-th row/column of matrix  A  is  moved  to  J-th
                    position. For performance reasons we do NOT check that
                    P[] is  a   correct   permutation  (that there  is  no
                    repetitions, just that all its elements  are  in [0,N)
                    range.

OUTPUT PARAMETERS
    B           -   permuted matrix.  Permutation  is  applied  to A  from
                    the both sides, only upper or lower triangle (depending
                    on IsUpper) is stored.

NOTE: this function throws exception when called for non-CRS  matrix.  You
      must convert your matrix with SparseConvertToCRS() before using this
      function.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
void sparsesymmpermtbl(sparsematrix* a,
     ae_bool isupper,
     /* Integer */ ae_vector* p,
     sparsematrix* b,
     ae_state *_state)
{

    _sparsematrix_clear(b);

    sparsesymmpermtblbuf(a, isupper, p, b, _state);
}


/*************************************************************************
This function is a buffered version  of  SparseSymmPermTbl()  that  reuses
previously allocated storage in B as much as possible.

This function applies permutation given by permutation table P (as opposed
to product form of permutation) to sparse symmetric  matrix  A,  given  by
either upper or lower triangle: B := P*A*P'.

INPUT PARAMETERS
    A           -   sparse square matrix in CRS format.
    IsUpper     -   whether upper or lower triangle of A is used:
                    * if upper triangle is given,  only   A[i,j] for  j>=i
                      are used, and lower triangle is  ignored (it can  be
                      empty - these elements are not referenced at all).
                    * if lower triangle is given,  only   A[i,j] for  j<=i
                      are used, and upper triangle is ignored.
    P           -   array[N] which stores permutation table;  P[I]=J means
                    that I-th row/column of matrix  A  is  moved  to  J-th
                    position. For performance reasons we do NOT check that
                    P[] is  a   correct   permutation  (that there  is  no
                    repetitions, just that all its elements  are  in [0,N)
                    range.
    B           -   sparse matrix object that will hold output.
                    Previously allocated memory will be reused as much  as
                    possible.

OUTPUT PARAMETERS
    B           -   permuted matrix.  Permutation  is  applied  to A  from
                    the both sides, only upper or lower triangle (depending
                    on IsUpper) is stored.

NOTE: this function throws exception when called for non-CRS  matrix.  You
      must convert your matrix with SparseConvertToCRS() before using this
      function.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
void sparsesymmpermtblbuf(sparsematrix* a,
     ae_bool isupper,
     /* Integer */ ae_vector* p,
     sparsematrix* b,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jj;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t k0;
    ae_int_t k1;
    ae_int_t kk;
    ae_int_t n;
    ae_int_t dst;
    ae_bool bflag;


    ae_assert(a->matrixtype==1, "SparseSymmPermTblBuf: incorrect matrix type (convert your matrix to CRS)", _state);
    ae_assert(p->cnt>=a->n, "SparseSymmPermTblBuf: Length(P)<N", _state);
    ae_assert(a->m==a->n, "SparseSymmPermTblBuf: matrix is non-square", _state);
    bflag = ae_true;
    for(i=0; i<=a->n-1; i++)
    {
        bflag = (bflag&&p->ptr.p_int[i]>=0)&&p->ptr.p_int[i]<a->n;
    }
    ae_assert(bflag, "SparseSymmPermTblBuf: P[] contains values outside of [0,N) range", _state);
    n = a->n;

    /*
     * Prepare output
     */
    ae_assert(a->ninitialized==a->ridx.ptr.p_int[n], "SparseSymmPermTblBuf: integrity check failed", _state);
    b->matrixtype = 1;
    b->n = n;
    b->m = n;
    ivectorsetlengthatleast(&b->didx, n, _state);
    ivectorsetlengthatleast(&b->uidx, n, _state);

    /*
     * Determine row sizes (temporary stored in DIdx) and ranges
     */
    isetv(n, 0, &b->didx, _state);
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j0 = a->didx.ptr.p_int[i];
            j1 = a->ridx.ptr.p_int[i+1]-1;
            k0 = p->ptr.p_int[i];
            for(jj=j0; jj<=j1; jj++)
            {
                k1 = p->ptr.p_int[a->idx.ptr.p_int[jj]];
                if( k1<k0 )
                {
                    b->didx.ptr.p_int[k1] = b->didx.ptr.p_int[k1]+1;
                }
                else
                {
                    b->didx.ptr.p_int[k0] = b->didx.ptr.p_int[k0]+1;
                }
            }
        }
        else
        {
            j0 = a->ridx.ptr.p_int[i];
            j1 = a->uidx.ptr.p_int[i]-1;
            k0 = p->ptr.p_int[i];
            for(jj=j0; jj<=j1; jj++)
            {
                k1 = p->ptr.p_int[a->idx.ptr.p_int[jj]];
                if( k1>k0 )
                {
                    b->didx.ptr.p_int[k1] = b->didx.ptr.p_int[k1]+1;
                }
                else
                {
                    b->didx.ptr.p_int[k0] = b->didx.ptr.p_int[k0]+1;
                }
            }
        }
    }
    ivectorsetlengthatleast(&b->ridx, n+1, _state);
    b->ridx.ptr.p_int[0] = 0;
    for(i=0; i<=n-1; i++)
    {
        b->ridx.ptr.p_int[i+1] = b->ridx.ptr.p_int[i]+b->didx.ptr.p_int[i];
    }
    b->ninitialized = b->ridx.ptr.p_int[n];
    ivectorsetlengthatleast(&b->idx, b->ninitialized, _state);
    rvectorsetlengthatleast(&b->vals, b->ninitialized, _state);

    /*
     * Process matrix
     */
    for(i=0; i<=n-1; i++)
    {
        b->uidx.ptr.p_int[i] = b->ridx.ptr.p_int[i];
    }
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j0 = a->didx.ptr.p_int[i];
            j1 = a->ridx.ptr.p_int[i+1]-1;
            for(jj=j0; jj<=j1; jj++)
            {
                j = a->idx.ptr.p_int[jj];
                k0 = p->ptr.p_int[i];
                k1 = p->ptr.p_int[j];
                if( k1<k0 )
                {
                    kk = k0;
                    k0 = k1;
                    k1 = kk;
                }
                dst = b->uidx.ptr.p_int[k0];
                b->idx.ptr.p_int[dst] = k1;
                b->vals.ptr.p_double[dst] = a->vals.ptr.p_double[jj];
                b->uidx.ptr.p_int[k0] = dst+1;
            }
        }
        else
        {
            j0 = a->ridx.ptr.p_int[i];
            j1 = a->uidx.ptr.p_int[i]-1;
            for(jj=j0; jj<=j1; jj++)
            {
                j = a->idx.ptr.p_int[jj];
                k0 = p->ptr.p_int[i];
                k1 = p->ptr.p_int[j];
                if( k1>k0 )
                {
                    kk = k0;
                    k0 = k1;
                    k1 = kk;
                }
                dst = b->uidx.ptr.p_int[k0];
                b->idx.ptr.p_int[dst] = k1;
                b->vals.ptr.p_double[dst] = a->vals.ptr.p_double[jj];
                b->uidx.ptr.p_int[k0] = dst+1;
            }
        }
    }

    /*
     * Finalize matrix
     */
    for(i=0; i<=n-1; i++)
    {
        tagsortmiddleir(&b->idx, &b->vals, b->ridx.ptr.p_int[i], b->ridx.ptr.p_int[i+1]-b->ridx.ptr.p_int[i], _state);
    }
    sparseinitduidx(b, _state);
}


/*************************************************************************
This procedure resizes Hash-Table matrix. It can be called when you  have
deleted too many elements from the matrix, and you want to  free unneeded
memory.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseresizematrix(sparsematrix* s, ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t k;
    ae_int_t k1;
    ae_int_t i;
    ae_vector tvals;
    ae_vector tidx;

    ae_frame_make(_state, &_frame_block);
    memset(&tvals, 0, sizeof(tvals));
    memset(&tidx, 0, sizeof(tidx));
    ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tidx, 0, DT_INT, _state, ae_true);

    ae_assert(s->matrixtype==0, "SparseResizeMatrix: incorrect matrix type", _state);

    /*
     * Initialization for length and number of non-null elementd
     */
    k = s->tablesize;
    k1 = 0;

    /*
     * Calculating number of non-null elements
     */
    for(i=0; i<=k-1; i++)
    {
        if( s->idx.ptr.p_int[2*i]>=0 )
        {
            k1 = k1+1;
        }
    }

    /*
     * Initialization value for free space
     */
    s->tablesize = ae_round(k1/sparse_desiredloadfactor*sparse_growfactor+sparse_additional, _state);
    s->nfree = s->tablesize-k1;
    ae_vector_set_length(&tvals, s->tablesize, _state);
    ae_vector_set_length(&tidx, 2*s->tablesize, _state);
    ae_swap_vectors(&s->vals, &tvals);
    ae_swap_vectors(&s->idx, &tidx);
    for(i=0; i<=s->tablesize-1; i++)
    {
        s->idx.ptr.p_int[2*i] = -1;
    }
    for(i=0; i<=k-1; i++)
    {
        if( tidx.ptr.p_int[2*i]>=0 )
        {
            sparseset(s, tidx.ptr.p_int[2*i], tidx.ptr.p_int[2*i+1], tvals.ptr.p_double[i], _state);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Procedure for initialization 'S.DIdx' and 'S.UIdx'


  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseinitduidx(sparsematrix* s, ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t lt;
    ae_int_t rt;


    ae_assert(s->matrixtype==1, "SparseInitDUIdx: internal error, incorrect matrix type", _state);
    ivectorsetlengthatleast(&s->didx, s->m, _state);
    ivectorsetlengthatleast(&s->uidx, s->m, _state);
    for(i=0; i<=s->m-1; i++)
    {
        s->uidx.ptr.p_int[i] = -1;
        s->didx.ptr.p_int[i] = -1;
        lt = s->ridx.ptr.p_int[i];
        rt = s->ridx.ptr.p_int[i+1];
        for(j=lt; j<=rt-1; j++)
        {
            k = s->idx.ptr.p_int[j];
            if( k==i )
            {
                s->didx.ptr.p_int[i] = j;
            }
            else
            {
                if( k>i&&s->uidx.ptr.p_int[i]==-1 )
                {
                    s->uidx.ptr.p_int[i] = j;
                    break;
                }
            }
        }
        if( s->uidx.ptr.p_int[i]==-1 )
        {
            s->uidx.ptr.p_int[i] = s->ridx.ptr.p_int[i+1];
        }
        if( s->didx.ptr.p_int[i]==-1 )
        {
            s->didx.ptr.p_int[i] = s->uidx.ptr.p_int[i];
        }
    }
}


/*************************************************************************
This function return average length of chain at hash-table.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
double sparsegetaveragelengthofchain(sparsematrix* s, ae_state *_state)
{
    ae_int_t nchains;
    ae_int_t talc;
    ae_int_t l;
    ae_int_t i;
    ae_int_t ind0;
    ae_int_t ind1;
    ae_int_t hashcode;
    double result;


    /*
     * If matrix represent in CRS then return zero and exit
     */
    if( s->matrixtype!=0 )
    {
        result = (double)(0);
        return result;
    }
    nchains = 0;
    talc = 0;
    l = s->tablesize;
    for(i=0; i<=l-1; i++)
    {
        ind0 = 2*i;
        if( s->idx.ptr.p_int[ind0]!=-1 )
        {
            nchains = nchains+1;
            hashcode = sparse_hash(s->idx.ptr.p_int[ind0], s->idx.ptr.p_int[ind0+1], l, _state);
            for(;;)
            {
                talc = talc+1;
                ind1 = 2*hashcode;
                if( s->idx.ptr.p_int[ind0]==s->idx.ptr.p_int[ind1]&&s->idx.ptr.p_int[ind0+1]==s->idx.ptr.p_int[ind1+1] )
                {
                    break;
                }
                hashcode = (hashcode+1)%l;
            }
        }
    }
    if( nchains==0 )
    {
        result = (double)(0);
    }
    else
    {
        result = (double)talc/(double)nchains;
    }
    return result;
}


/*************************************************************************
This  function  is  used  to enumerate all elements of the sparse matrix.
Before  first  call  user  initializes  T0 and T1 counters by zero. These
counters are used to remember current position in a  matrix;  after  each
call they are updated by the function.

Subsequent calls to this function return non-zero elements of the  sparse
matrix, one by one. If you enumerate CRS matrix, matrix is traversed from
left to right, from top to bottom. In case you enumerate matrix stored as
Hash table, elements are returned in random order.

EXAMPLE
    > T0=0
    > T1=0
    > while SparseEnumerate(S,T0,T1,I,J,V) do
    >     ....do something with I,J,V

INPUT PARAMETERS
    S           -   sparse M*N matrix in Hash-Table or CRS representation.
    T0          -   internal counter
    T1          -   internal counter

OUTPUT PARAMETERS
    T0          -   new value of the internal counter
    T1          -   new value of the internal counter
    I           -   row index of non-zero element, 0<=I<M.
    J           -   column index of non-zero element, 0<=J<N
    V           -   value of the T-th element

RESULT
    True in case of success (next non-zero element was retrieved)
    False in case all non-zero elements were enumerated

NOTE: you may call SparseRewriteExisting() during enumeration, but it  is
      THE  ONLY  matrix  modification  function  you  can  call!!!  Other
      matrix modification functions should not be called during enumeration!

  -- ALGLIB PROJECT --
     Copyright 14.03.2012 by Bochkanov Sergey
*************************************************************************/
ae_bool sparseenumerate(sparsematrix* s,
     ae_int_t* t0,
     ae_int_t* t1,
     ae_int_t* i,
     ae_int_t* j,
     double* v,
     ae_state *_state)
{
    ae_int_t sz;
    ae_int_t i0;
    ae_bool result;

    *i = 0;
    *j = 0;
    *v = 0;

    result = ae_false;
    if( *t0<0||(s->matrixtype!=0&&*t1<0) )
    {

        /*
         * Incorrect T0/T1, terminate enumeration
         */
        result = ae_false;
        return result;
    }
    if( s->matrixtype==0 )
    {

        /*
         * Hash-table matrix
         */
        sz = s->tablesize;
        for(i0=*t0; i0<=sz-1; i0++)
        {
            if( s->idx.ptr.p_int[2*i0]==-1||s->idx.ptr.p_int[2*i0]==-2 )
            {
                continue;
            }
            else
            {
                *i = s->idx.ptr.p_int[2*i0];
                *j = s->idx.ptr.p_int[2*i0+1];
                *v = s->vals.ptr.p_double[i0];
                *t0 = i0+1;
                result = ae_true;
                return result;
            }
        }
        *t0 = 0;
        *t1 = 0;
        result = ae_false;
        return result;
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS matrix
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseEnumerate: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        if( *t0>=s->ninitialized )
        {
            *t0 = 0;
            *t1 = 0;
            result = ae_false;
            return result;
        }
        while(*t0>s->ridx.ptr.p_int[*t1+1]-1&&*t1<s->m)
        {
            *t1 = *t1+1;
        }
        *i = *t1;
        *j = s->idx.ptr.p_int[*t0];
        *v = s->vals.ptr.p_double[*t0];
        *t0 = *t0+1;
        result = ae_true;
        return result;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS matrix:
         * * T0 stores current offset in Vals[] array
         * * T1 stores index of the diagonal block
         */
        ae_assert(s->m==s->n, "SparseEnumerate: non-square SKS matrices are not supported", _state);
        if( *t0>=s->ridx.ptr.p_int[s->m] )
        {
            *t0 = 0;
            *t1 = 0;
            result = ae_false;
            return result;
        }
        while(*t0>s->ridx.ptr.p_int[*t1+1]-1&&*t1<s->m)
        {
            *t1 = *t1+1;
        }
        i0 = *t0-s->ridx.ptr.p_int[*t1];
        if( i0<s->didx.ptr.p_int[*t1]+1 )
        {

            /*
             * subdiagonal or diagonal element, row index is T1.
             */
            *i = *t1;
            *j = *t1-s->didx.ptr.p_int[*t1]+i0;
        }
        else
        {

            /*
             * superdiagonal element, column index is T1.
             */
            *i = *t1-(s->ridx.ptr.p_int[*t1+1]-(*t0));
            *j = *t1;
        }
        *v = s->vals.ptr.p_double[*t0];
        *t0 = *t0+1;
        result = ae_true;
        return result;
    }
    ae_assert(ae_false, "SparseEnumerate: unexpected matrix type", _state);
    return result;
}


/*************************************************************************
This function rewrites existing (non-zero) element. It  returns  True   if
element  exists  or  False,  when  it  is  called for non-existing  (zero)
element.

This function works with any kind of the matrix.

The purpose of this function is to provide convenient thread-safe  way  to
modify  sparse  matrix.  Such  modification  (already  existing element is
rewritten) is guaranteed to be thread-safe without any synchronization, as
long as different threads modify different elements.

INPUT PARAMETERS
    S           -   sparse M*N matrix in any kind of representation
                    (Hash, SKS, CRS).
    I           -   row index of non-zero element to modify, 0<=I<M
    J           -   column index of non-zero element to modify, 0<=J<N
    V           -   value to rewrite, must be finite number

OUTPUT PARAMETERS
    S           -   modified matrix
RESULT
    True in case when element exists
    False in case when element doesn't exist or it is zero

  -- ALGLIB PROJECT --
     Copyright 14.03.2012 by Bochkanov Sergey
*************************************************************************/
ae_bool sparserewriteexisting(sparsematrix* s,
     ae_int_t i,
     ae_int_t j,
     double v,
     ae_state *_state)
{
    ae_int_t hashcode;
    ae_int_t k;
    ae_int_t k0;
    ae_int_t k1;
    ae_bool result;


    ae_assert(0<=i&&i<s->m, "SparseRewriteExisting: invalid argument I(either I<0 or I>=S.M)", _state);
    ae_assert(0<=j&&j<s->n, "SparseRewriteExisting: invalid argument J(either J<0 or J>=S.N)", _state);
    ae_assert(ae_isfinite(v, _state), "SparseRewriteExisting: invalid argument V(either V is infinite or V is NaN)", _state);
    result = ae_false;

    /*
     * Hash-table matrix
     */
    if( s->matrixtype==0 )
    {
        k = s->tablesize;
        hashcode = sparse_hash(i, j, k, _state);
        for(;;)
        {
            if( s->idx.ptr.p_int[2*hashcode]==-1 )
            {
                return result;
            }
            if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
            {
                s->vals.ptr.p_double[hashcode] = v;
                result = ae_true;
                return result;
            }
            hashcode = (hashcode+1)%k;
        }
    }

    /*
     * CRS matrix
     */
    if( s->matrixtype==1 )
    {
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseRewriteExisting: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        k0 = s->ridx.ptr.p_int[i];
        k1 = s->ridx.ptr.p_int[i+1]-1;
        while(k0<=k1)
        {
            k = (k0+k1)/2;
            if( s->idx.ptr.p_int[k]==j )
            {
                s->vals.ptr.p_double[k] = v;
                result = ae_true;
                return result;
            }
            if( s->idx.ptr.p_int[k]<j )
            {
                k0 = k+1;
            }
            else
            {
                k1 = k-1;
            }
        }
    }

    /*
     * SKS
     */
    if( s->matrixtype==2 )
    {
        ae_assert(s->m==s->n, "SparseRewriteExisting: non-square SKS matrix not supported", _state);
        if( i==j )
        {

            /*
             * Rewrite diagonal element
             */
            result = ae_true;
            s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+s->didx.ptr.p_int[i]] = v;
            return result;
        }
        if( j<i )
        {

            /*
             * Return subdiagonal element at I-th "skyline block"
             */
            k = s->didx.ptr.p_int[i];
            if( i-j<=k )
            {
                s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+k+j-i] = v;
                result = ae_true;
            }
        }
        else
        {

            /*
             * Return superdiagonal element at J-th "skyline block"
             */
            k = s->uidx.ptr.p_int[j];
            if( j-i<=k )
            {
                s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)] = v;
                result = ae_true;
            }
        }
        return result;
    }
    return result;
}


/*************************************************************************
This function returns I-th row of the sparse matrix. Matrix must be stored
in CRS or SKS format.

INPUT PARAMETERS:
    S           -   sparse M*N matrix in CRS format
    I           -   row index, 0<=I<M
    IRow        -   output buffer, can be  preallocated.  In  case  buffer
                    size  is  too  small  to  store  I-th   row,   it   is
                    automatically reallocated.

OUTPUT PARAMETERS:
    IRow        -   array[M], I-th row.

NOTE: this function has O(N) running time, where N is a  column  count. It
      allocates and fills N-element  array,  even  although  most  of  its
      elemets are zero.

NOTE: If you have O(non-zeros-per-row) time and memory  requirements,  use
      SparseGetCompressedRow() function. It  returns  data  in  compressed
      format.

NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
      is passed, this function throws exception.

  -- ALGLIB PROJECT --
     Copyright 10.12.2014 by Bochkanov Sergey
*************************************************************************/
void sparsegetrow(sparsematrix* s,
     ae_int_t i,
     /* Real    */ ae_vector* irow,
     ae_state *_state)
{
    ae_int_t i0;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t j;
    ae_int_t upperprofile;


    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseGetRow: S must be CRS/SKS-based matrix", _state);
    ae_assert(i>=0&&i<s->m, "SparseGetRow: I<0 or I>=M", _state);

    /*
     * Prepare output buffer
     */
    rvectorsetlengthatleast(irow, s->n, _state);
    for(i0=0; i0<=s->n-1; i0++)
    {
        irow->ptr.p_double[i0] = (double)(0);
    }

    /*
     * Output
     */
    if( s->matrixtype==1 )
    {
        for(i0=s->ridx.ptr.p_int[i]; i0<=s->ridx.ptr.p_int[i+1]-1; i0++)
        {
            irow->ptr.p_double[s->idx.ptr.p_int[i0]] = s->vals.ptr.p_double[i0];
        }
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * Copy subdiagonal and diagonal parts
         */
        ae_assert(s->n==s->m, "SparseGetRow: non-square SKS matrices are not supported", _state);
        j0 = i-s->didx.ptr.p_int[i];
        i0 = -j0+s->ridx.ptr.p_int[i];
        for(j=j0; j<=i; j++)
        {
            irow->ptr.p_double[j] = s->vals.ptr.p_double[j+i0];
        }

        /*
         * Copy superdiagonal part
         */
        upperprofile = s->uidx.ptr.p_int[s->n];
        j0 = i+1;
        j1 = ae_minint(s->n-1, i+upperprofile, _state);
        for(j=j0; j<=j1; j++)
        {
            if( j-i<=s->uidx.ptr.p_int[j] )
            {
                irow->ptr.p_double[j] = s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)];
            }
        }
        return;
    }
}


/*************************************************************************
This function returns I-th row of the sparse matrix IN COMPRESSED FORMAT -
only non-zero elements are returned (with their indexes). Matrix  must  be
stored in CRS or SKS format.

INPUT PARAMETERS:
    S           -   sparse M*N matrix in CRS format
    I           -   row index, 0<=I<M
    ColIdx      -   output buffer for column indexes, can be preallocated.
                    In case buffer size is too small to store I-th row, it
                    is automatically reallocated.
    Vals        -   output buffer for values, can be preallocated. In case
                    buffer size is too small to  store  I-th  row,  it  is
                    automatically reallocated.

OUTPUT PARAMETERS:
    ColIdx      -   column   indexes   of  non-zero  elements,  sorted  by
                    ascending. Symbolically non-zero elements are  counted
                    (i.e. if you allocated place for element, but  it  has
                    zero numerical value - it is counted).
    Vals        -   values. Vals[K] stores value of  matrix  element  with
                    indexes (I,ColIdx[K]). Symbolically non-zero  elements
                    are counted (i.e. if you allocated place for  element,
                    but it has zero numerical value - it is counted).
    NZCnt       -   number of symbolically non-zero elements per row.

NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
      is passed, this function throws exception.

NOTE: this function may allocate additional, unnecessary place for  ColIdx
      and Vals arrays. It is dictated by  performance  reasons  -  on  SKS
      matrices it is faster  to  allocate  space  at  the  beginning  with
      some "extra"-space, than performing two passes over matrix  -  first
      time to calculate exact space required for data, second  time  -  to
      store data itself.

  -- ALGLIB PROJECT --
     Copyright 10.12.2014 by Bochkanov Sergey
*************************************************************************/
void sparsegetcompressedrow(sparsematrix* s,
     ae_int_t i,
     /* Integer */ ae_vector* colidx,
     /* Real    */ ae_vector* vals,
     ae_int_t* nzcnt,
     ae_state *_state)
{
    ae_int_t k;
    ae_int_t k0;
    ae_int_t j;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t i0;
    ae_int_t upperprofile;

    *nzcnt = 0;

    ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseGetRow: S must be CRS/SKS-based matrix", _state);
    ae_assert(i>=0&&i<s->m, "SparseGetRow: I<0 or I>=M", _state);

    /*
     * Initialize NZCnt
     */
    *nzcnt = 0;

    /*
     * CRS matrix - just copy data
     */
    if( s->matrixtype==1 )
    {
        *nzcnt = s->ridx.ptr.p_int[i+1]-s->ridx.ptr.p_int[i];
        ivectorsetlengthatleast(colidx, *nzcnt, _state);
        rvectorsetlengthatleast(vals, *nzcnt, _state);
        k0 = s->ridx.ptr.p_int[i];
        for(k=0; k<=*nzcnt-1; k++)
        {
            colidx->ptr.p_int[k] = s->idx.ptr.p_int[k0+k];
            vals->ptr.p_double[k] = s->vals.ptr.p_double[k0+k];
        }
        return;
    }

    /*
     * SKS matrix - a bit more complex sequence
     */
    if( s->matrixtype==2 )
    {
        ae_assert(s->n==s->m, "SparseGetCompressedRow: non-square SKS matrices are not supported", _state);

        /*
         * Allocate enough place for storage
         */
        upperprofile = s->uidx.ptr.p_int[s->n];
        ivectorsetlengthatleast(colidx, s->didx.ptr.p_int[i]+1+upperprofile, _state);
        rvectorsetlengthatleast(vals, s->didx.ptr.p_int[i]+1+upperprofile, _state);

        /*
         * Copy subdiagonal and diagonal parts
         */
        j0 = i-s->didx.ptr.p_int[i];
        i0 = -j0+s->ridx.ptr.p_int[i];
        for(j=j0; j<=i; j++)
        {
            colidx->ptr.p_int[*nzcnt] = j;
            vals->ptr.p_double[*nzcnt] = s->vals.ptr.p_double[j+i0];
            *nzcnt = *nzcnt+1;
        }

        /*
         * Copy superdiagonal part
         */
        j0 = i+1;
        j1 = ae_minint(s->n-1, i+upperprofile, _state);
        for(j=j0; j<=j1; j++)
        {
            if( j-i<=s->uidx.ptr.p_int[j] )
            {
                colidx->ptr.p_int[*nzcnt] = j;
                vals->ptr.p_double[*nzcnt] = s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)];
                *nzcnt = *nzcnt+1;
            }
        }
        return;
    }
}


/*************************************************************************
This function performs efficient in-place  transpose  of  SKS  matrix.  No
additional memory is allocated during transposition.

This function supports only skyline storage format (SKS).

INPUT PARAMETERS
    S       -   sparse matrix in SKS format.

OUTPUT PARAMETERS
    S           -   sparse matrix, transposed.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsetransposesks(sparsematrix* s, ae_state *_state)
{
    ae_int_t n;
    ae_int_t d;
    ae_int_t u;
    ae_int_t i;
    ae_int_t k;
    ae_int_t t0;
    ae_int_t t1;
    double v;


    ae_assert(s->matrixtype==2, "SparseTransposeSKS: only SKS matrices are supported", _state);
    ae_assert(s->m==s->n, "SparseTransposeSKS: non-square SKS matrices are not supported", _state);
    n = s->n;
    for(i=1; i<=n-1; i++)
    {
        d = s->didx.ptr.p_int[i];
        u = s->uidx.ptr.p_int[i];
        k = s->uidx.ptr.p_int[i];
        s->uidx.ptr.p_int[i] = s->didx.ptr.p_int[i];
        s->didx.ptr.p_int[i] = k;
        if( d==u )
        {

            /*
             * Upper skyline height equal to lower skyline height,
             * simple exchange is needed for transposition
             */
            t0 = s->ridx.ptr.p_int[i];
            for(k=0; k<=d-1; k++)
            {
                v = s->vals.ptr.p_double[t0+k];
                s->vals.ptr.p_double[t0+k] = s->vals.ptr.p_double[t0+d+1+k];
                s->vals.ptr.p_double[t0+d+1+k] = v;
            }
        }
        if( d>u )
        {

            /*
             * Upper skyline height is less than lower skyline height.
             *
             * Transposition becomes a bit tricky: we have to rearrange
             * "L0 L1 D U" to "U D L0 L1", where |L0|=|U|=u, |L1|=d-u.
             *
             * In order to do this we perform a sequence of swaps and
             * in-place reversals:
             * * swap(L0,U)         =>  "U   L1  D   L0"
             * * reverse("L1 D L0") =>  "U   L0~ D   L1~" (where X~ is a reverse of X)
             * * reverse("L0~ D")   =>  "U   D   L0  L1~"
             * * reverse("L1")      =>  "U   D   L0  L1"
             */
            t0 = s->ridx.ptr.p_int[i];
            t1 = s->ridx.ptr.p_int[i]+d+1;
            for(k=0; k<=u-1; k++)
            {
                v = s->vals.ptr.p_double[t0+k];
                s->vals.ptr.p_double[t0+k] = s->vals.ptr.p_double[t1+k];
                s->vals.ptr.p_double[t1+k] = v;
            }
            t0 = s->ridx.ptr.p_int[i]+u;
            t1 = s->ridx.ptr.p_int[i+1]-1;
            while(t1>t0)
            {
                v = s->vals.ptr.p_double[t0];
                s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
                s->vals.ptr.p_double[t1] = v;
                t0 = t0+1;
                t1 = t1-1;
            }
            t0 = s->ridx.ptr.p_int[i]+u;
            t1 = s->ridx.ptr.p_int[i]+u+u;
            while(t1>t0)
            {
                v = s->vals.ptr.p_double[t0];
                s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
                s->vals.ptr.p_double[t1] = v;
                t0 = t0+1;
                t1 = t1-1;
            }
            t0 = s->ridx.ptr.p_int[i+1]-(d-u);
            t1 = s->ridx.ptr.p_int[i+1]-1;
            while(t1>t0)
            {
                v = s->vals.ptr.p_double[t0];
                s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
                s->vals.ptr.p_double[t1] = v;
                t0 = t0+1;
                t1 = t1-1;
            }
        }
        if( d<u )
        {

            /*
             * Upper skyline height is greater than lower skyline height.
             *
             * Transposition becomes a bit tricky: we have to rearrange
             * "L D U0 U1" to "U0 U1 D L", where |U1|=|L|=d, |U0|=u-d.
             *
             * In order to do this we perform a sequence of swaps and
             * in-place reversals:
             * * swap(L,U1)         =>  "U1  D   U0  L"
             * * reverse("U1 D U0") =>  "U0~ D   U1~ L" (where X~ is a reverse of X)
             * * reverse("U0~")     =>  "U0  D   U1~ L"
             * * reverse("D U1~")   =>  "U0  U1  D   L"
             */
            t0 = s->ridx.ptr.p_int[i];
            t1 = s->ridx.ptr.p_int[i+1]-d;
            for(k=0; k<=d-1; k++)
            {
                v = s->vals.ptr.p_double[t0+k];
                s->vals.ptr.p_double[t0+k] = s->vals.ptr.p_double[t1+k];
                s->vals.ptr.p_double[t1+k] = v;
            }
            t0 = s->ridx.ptr.p_int[i];
            t1 = s->ridx.ptr.p_int[i]+u;
            while(t1>t0)
            {
                v = s->vals.ptr.p_double[t0];
                s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
                s->vals.ptr.p_double[t1] = v;
                t0 = t0+1;
                t1 = t1-1;
            }
            t0 = s->ridx.ptr.p_int[i];
            t1 = s->ridx.ptr.p_int[i]+u-d-1;
            while(t1>t0)
            {
                v = s->vals.ptr.p_double[t0];
                s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
                s->vals.ptr.p_double[t1] = v;
                t0 = t0+1;
                t1 = t1-1;
            }
            t0 = s->ridx.ptr.p_int[i]+u-d;
            t1 = s->ridx.ptr.p_int[i+1]-d-1;
            while(t1>t0)
            {
                v = s->vals.ptr.p_double[t0];
                s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
                s->vals.ptr.p_double[t1] = v;
                t0 = t0+1;
                t1 = t1-1;
            }
        }
    }
    k = s->uidx.ptr.p_int[n];
    s->uidx.ptr.p_int[n] = s->didx.ptr.p_int[n];
    s->didx.ptr.p_int[n] = k;
}


/*************************************************************************
This function performs transpose of CRS matrix.

INPUT PARAMETERS
    S       -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S           -   sparse matrix, transposed.

NOTE: internal  temporary  copy  is  allocated   for   the   purposes   of
      transposition. It is deallocated after transposition.

  -- ALGLIB PROJECT --
     Copyright 30.01.2018 by Bochkanov Sergey
*************************************************************************/
void sparsetransposecrs(sparsematrix* s, ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector oldvals;
    ae_vector oldidx;
    ae_vector oldridx;
    ae_int_t oldn;
    ae_int_t oldm;
    ae_int_t newn;
    ae_int_t newm;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t nonne;
    ae_vector counts;

    ae_frame_make(_state, &_frame_block);
    memset(&oldvals, 0, sizeof(oldvals));
    memset(&oldidx, 0, sizeof(oldidx));
    memset(&oldridx, 0, sizeof(oldridx));
    memset(&counts, 0, sizeof(counts));
    ae_vector_init(&oldvals, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&oldidx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&oldridx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&counts, 0, DT_INT, _state, ae_true);

    ae_assert(s->matrixtype==1, "SparseTransposeCRS: only CRS matrices are supported", _state);
    ae_swap_vectors(&s->vals, &oldvals);
    ae_swap_vectors(&s->idx, &oldidx);
    ae_swap_vectors(&s->ridx, &oldridx);
    oldn = s->n;
    oldm = s->m;
    newn = oldm;
    newm = oldn;

    /*
     * Update matrix size
     */
    s->n = newn;
    s->m = newm;

    /*
     * Fill RIdx by number of elements per row:
     * RIdx[I+1] stores number of elements in I-th row.
     *
     * Convert RIdx from row sizes to row offsets.
     * Set NInitialized
     */
    nonne = 0;
    ivectorsetlengthatleast(&s->ridx, newm+1, _state);
    for(i=0; i<=newm; i++)
    {
        s->ridx.ptr.p_int[i] = 0;
    }
    for(i=0; i<=oldm-1; i++)
    {
        for(j=oldridx.ptr.p_int[i]; j<=oldridx.ptr.p_int[i+1]-1; j++)
        {
            k = oldidx.ptr.p_int[j]+1;
            s->ridx.ptr.p_int[k] = s->ridx.ptr.p_int[k]+1;
            nonne = nonne+1;
        }
    }
    for(i=0; i<=newm-1; i++)
    {
        s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i+1]+s->ridx.ptr.p_int[i];
    }
    s->ninitialized = s->ridx.ptr.p_int[newm];

    /*
     * Allocate memory and move elements to Vals/Idx.
     */
    ae_vector_set_length(&counts, newm, _state);
    for(i=0; i<=newm-1; i++)
    {
        counts.ptr.p_int[i] = 0;
    }
    rvectorsetlengthatleast(&s->vals, nonne, _state);
    ivectorsetlengthatleast(&s->idx, nonne, _state);
    for(i=0; i<=oldm-1; i++)
    {
        for(j=oldridx.ptr.p_int[i]; j<=oldridx.ptr.p_int[i+1]-1; j++)
        {
            k = oldidx.ptr.p_int[j];
            k = s->ridx.ptr.p_int[k]+counts.ptr.p_int[k];
            s->idx.ptr.p_int[k] = i;
            s->vals.ptr.p_double[k] = oldvals.ptr.p_double[j];
            k = oldidx.ptr.p_int[j];
            counts.ptr.p_int[k] = counts.ptr.p_int[k]+1;
        }
    }

    /*
     * Initialization 'S.UIdx' and 'S.DIdx'
     */
    sparseinitduidx(s, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
This function performs copying with transposition of CRS matrix.

INPUT PARAMETERS
    S0      -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S1      -   sparse matrix, transposed

  -- ALGLIB PROJECT --
     Copyright 23.07.2018 by Bochkanov Sergey
*************************************************************************/
void sparsecopytransposecrs(sparsematrix* s0,
     sparsematrix* s1,
     ae_state *_state)
{

    _sparsematrix_clear(s1);

    sparsecopytransposecrsbuf(s0, s1, _state);
}


/*************************************************************************
This function performs copying with transposition of CRS matrix  (buffered
version which reuses memory already allocated by  the  target as  much  as
possible).

INPUT PARAMETERS
    S0      -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S1      -   sparse matrix, transposed; previously allocated memory  is
                reused if possible.

  -- ALGLIB PROJECT --
     Copyright 23.07.2018 by Bochkanov Sergey
*************************************************************************/
void sparsecopytransposecrsbuf(sparsematrix* s0,
     sparsematrix* s1,
     ae_state *_state)
{
    ae_int_t oldn;
    ae_int_t oldm;
    ae_int_t newn;
    ae_int_t newm;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t kk;
    ae_int_t j0;
    ae_int_t j1;


    ae_assert(s0->matrixtype==1, "SparseCopyTransposeCRSBuf: only CRS matrices are supported", _state);
    oldn = s0->n;
    oldm = s0->m;
    newn = oldm;
    newm = oldn;

    /*
     * Update matrix size
     */
    s1->matrixtype = 1;
    s1->n = newn;
    s1->m = newm;

    /*
     * Fill RIdx by number of elements per row:
     * RIdx[I+1] stores number of elements in I-th row.
     *
     * Convert RIdx from row sizes to row offsets.
     * Set NInitialized
     */
    isetallocv(newm+1, 0, &s1->ridx, _state);
    for(i=0; i<=oldm-1; i++)
    {
        j0 = s0->ridx.ptr.p_int[i];
        j1 = s0->ridx.ptr.p_int[i+1]-1;
        for(j=j0; j<=j1; j++)
        {
            k = s0->idx.ptr.p_int[j]+1;
            s1->ridx.ptr.p_int[k] = s1->ridx.ptr.p_int[k]+1;
        }
    }
    for(i=0; i<=newm-1; i++)
    {
        s1->ridx.ptr.p_int[i+1] = s1->ridx.ptr.p_int[i+1]+s1->ridx.ptr.p_int[i];
    }
    s1->ninitialized = s1->ridx.ptr.p_int[newm];

    /*
     * Allocate memory and move elements to Vals/Idx.
     */
    ivectorsetlengthatleast(&s1->didx, newm, _state);
    for(i=0; i<=newm-1; i++)
    {
        s1->didx.ptr.p_int[i] = s1->ridx.ptr.p_int[i];
    }
    rvectorsetlengthatleast(&s1->vals, s1->ninitialized, _state);
    ivectorsetlengthatleast(&s1->idx, s1->ninitialized, _state);
    for(i=0; i<=oldm-1; i++)
    {
        j0 = s0->ridx.ptr.p_int[i];
        j1 = s0->ridx.ptr.p_int[i+1]-1;
        for(j=j0; j<=j1; j++)
        {
            kk = s0->idx.ptr.p_int[j];
            k = s1->didx.ptr.p_int[kk];
            s1->idx.ptr.p_int[k] = i;
            s1->vals.ptr.p_double[k] = s0->vals.ptr.p_double[j];
            s1->didx.ptr.p_int[kk] = k+1;
        }
    }

    /*
     * Initialization 'S.UIdx' and 'S.DIdx'
     */
    sparseinitduidx(s1, _state);
}


/*************************************************************************
This  function  performs  in-place  conversion  to  desired sparse storage
format.

INPUT PARAMETERS
    S0      -   sparse matrix in any format.
    Fmt     -   desired storage format  of  the  output,  as  returned  by
                SparseGetMatrixType() function:
                * 0 for hash-based storage
                * 1 for CRS
                * 2 for SKS

OUTPUT PARAMETERS
    S0          -   sparse matrix in requested format.

NOTE: in-place conversion wastes a lot of memory which is  used  to  store
      temporaries.  If  you  perform  a  lot  of  repeated conversions, we
      recommend to use out-of-place buffered  conversion  functions,  like
      SparseCopyToBuf(), which can reuse already allocated memory.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparseconvertto(sparsematrix* s0, ae_int_t fmt, ae_state *_state)
{


    ae_assert((fmt==0||fmt==1)||fmt==2, "SparseConvertTo: invalid fmt parameter", _state);
    if( fmt==0 )
    {
        sparseconverttohash(s0, _state);
        return;
    }
    if( fmt==1 )
    {
        sparseconverttocrs(s0, _state);
        return;
    }
    if( fmt==2 )
    {
        sparseconverttosks(s0, _state);
        return;
    }
    ae_assert(ae_false, "SparseConvertTo: invalid matrix type", _state);
}


/*************************************************************************
This  function  performs out-of-place conversion to desired sparse storage
format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
S1 is reused to maximum extent possible.

INPUT PARAMETERS
    S0      -   sparse matrix in any format.
    Fmt     -   desired storage format  of  the  output,  as  returned  by
                SparseGetMatrixType() function:
                * 0 for hash-based storage
                * 1 for CRS
                * 2 for SKS

OUTPUT PARAMETERS
    S1          -   sparse matrix in requested format.

  -- ALGLIB PROJECT --
     Copyright 16.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparsecopytobuf(sparsematrix* s0,
     ae_int_t fmt,
     sparsematrix* s1,
     ae_state *_state)
{


    ae_assert((fmt==0||fmt==1)||fmt==2, "SparseCopyToBuf: invalid fmt parameter", _state);
    if( fmt==0 )
    {
        sparsecopytohashbuf(s0, s1, _state);
        return;
    }
    if( fmt==1 )
    {
        sparsecopytocrsbuf(s0, s1, _state);
        return;
    }
    if( fmt==2 )
    {
        sparsecopytosksbuf(s0, s1, _state);
        return;
    }
    ae_assert(ae_false, "SparseCopyToBuf: invalid matrix type", _state);
}


/*************************************************************************
This function performs in-place conversion to Hash table storage.

INPUT PARAMETERS
    S           -   sparse matrix in CRS format.

OUTPUT PARAMETERS
    S           -   sparse matrix in Hash table format.

NOTE: this  function  has   no  effect  when  called with matrix which  is
      already in Hash table mode.

NOTE: in-place conversion involves allocation of temporary arrays. If  you
      perform a lot of repeated in- place  conversions,  it  may  lead  to
      memory fragmentation. Consider using out-of-place SparseCopyToHashBuf()
      function in this case.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparseconverttohash(sparsematrix* s, ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tidx;
    ae_vector tridx;
    ae_vector tdidx;
    ae_vector tuidx;
    ae_vector tvals;
    ae_int_t n;
    ae_int_t m;
    ae_int_t offs0;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;

    ae_frame_make(_state, &_frame_block);
    memset(&tidx, 0, sizeof(tidx));
    memset(&tridx, 0, sizeof(tridx));
    memset(&tdidx, 0, sizeof(tdidx));
    memset(&tuidx, 0, sizeof(tuidx));
    memset(&tvals, 0, sizeof(tvals));
    ae_vector_init(&tidx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tridx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tdidx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tuidx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);

    ae_assert((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2, "SparseConvertToHash: invalid matrix type", _state);
    if( s->matrixtype==0 )
    {

        /*
         * Already in Hash mode
         */
        ae_frame_leave(_state);
        return;
    }
    if( s->matrixtype==1 )
    {

        /*
         * From CRS to Hash
         */
        s->matrixtype = 0;
        m = s->m;
        n = s->n;
        ae_swap_vectors(&s->idx, &tidx);
        ae_swap_vectors(&s->ridx, &tridx);
        ae_swap_vectors(&s->vals, &tvals);
        sparsecreatebuf(m, n, tridx.ptr.p_int[m], s, _state);
        for(i=0; i<=m-1; i++)
        {
            for(j=tridx.ptr.p_int[i]; j<=tridx.ptr.p_int[i+1]-1; j++)
            {
                sparseset(s, i, tidx.ptr.p_int[j], tvals.ptr.p_double[j], _state);
            }
        }
        ae_frame_leave(_state);
        return;
    }
    if( s->matrixtype==2 )
    {

        /*
         * From SKS to Hash
         */
        s->matrixtype = 0;
        m = s->m;
        n = s->n;
        ae_swap_vectors(&s->ridx, &tridx);
        ae_swap_vectors(&s->didx, &tdidx);
        ae_swap_vectors(&s->uidx, &tuidx);
        ae_swap_vectors(&s->vals, &tvals);
        sparsecreatebuf(m, n, tridx.ptr.p_int[m], s, _state);
        for(i=0; i<=m-1; i++)
        {

            /*
             * copy subdiagonal and diagonal parts of I-th block
             */
            offs0 = tridx.ptr.p_int[i];
            k = tdidx.ptr.p_int[i]+1;
            for(j=0; j<=k-1; j++)
            {
                sparseset(s, i, i-tdidx.ptr.p_int[i]+j, tvals.ptr.p_double[offs0+j], _state);
            }

            /*
             * Copy superdiagonal part of I-th block
             */
            offs0 = tridx.ptr.p_int[i]+tdidx.ptr.p_int[i]+1;
            k = tuidx.ptr.p_int[i];
            for(j=0; j<=k-1; j++)
            {
                sparseset(s, i-k+j, i, tvals.ptr.p_double[offs0+j], _state);
            }
        }
        ae_frame_leave(_state);
        return;
    }
    ae_assert(ae_false, "SparseConvertToHash: invalid matrix type", _state);
    ae_frame_leave(_state);
}


/*************************************************************************
This  function  performs  out-of-place  conversion  to  Hash table storage
format. S0 is copied to S1 and converted on-the-fly.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in Hash table format.

NOTE: if S0 is stored as Hash-table, it is just copied without conversion.

NOTE: this function de-allocates memory  occupied  by  S1 before  starting
      conversion. If you perform a  lot  of  repeated  conversions, it may
      lead to memory fragmentation. In this case we recommend you  to  use
      SparseCopyToHashBuf() function which re-uses memory in S1 as much as
      possible.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytohash(sparsematrix* s0,
     sparsematrix* s1,
     ae_state *_state)
{

    _sparsematrix_clear(s1);

    ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToHash: invalid matrix type", _state);
    sparsecopytohashbuf(s0, s1, _state);
}


/*************************************************************************
This  function  performs  out-of-place  conversion  to  Hash table storage
format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
S1 is reused to maximum extent possible.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in Hash table format.

NOTE: if S0 is stored as Hash-table, it is just copied without conversion.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytohashbuf(sparsematrix* s0,
     sparsematrix* s1,
     ae_state *_state)
{
    double val;
    ae_int_t t0;
    ae_int_t t1;
    ae_int_t i;
    ae_int_t j;


    ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToHashBuf: invalid matrix type", _state);
    if( s0->matrixtype==0 )
    {

        /*
         * Already hash, just copy
         */
        sparsecopybuf(s0, s1, _state);
        return;
    }
    if( s0->matrixtype==1 )
    {

        /*
         * CRS storage
         */
        t0 = 0;
        t1 = 0;
        sparsecreatebuf(s0->m, s0->n, s0->ridx.ptr.p_int[s0->m], s1, _state);
        while(sparseenumerate(s0, &t0, &t1, &i, &j, &val, _state))
        {
            sparseset(s1, i, j, val, _state);
        }
        return;
    }
    if( s0->matrixtype==2 )
    {

        /*
         * SKS storage
         */
        t0 = 0;
        t1 = 0;
        sparsecreatebuf(s0->m, s0->n, s0->ridx.ptr.p_int[s0->m], s1, _state);
        while(sparseenumerate(s0, &t0, &t1, &i, &j, &val, _state))
        {
            sparseset(s1, i, j, val, _state);
        }
        return;
    }
    ae_assert(ae_false, "SparseCopyToHashBuf: invalid matrix type", _state);
}


/*************************************************************************
This function converts matrix to CRS format.

Some  algorithms  (linear  algebra ones, for example) require matrices in
CRS format. This function allows to perform in-place conversion.

INPUT PARAMETERS
    S           -   sparse M*N matrix in any format

OUTPUT PARAMETERS
    S           -   matrix in CRS format

NOTE: this   function  has  no  effect  when  called with matrix which is
      already in CRS mode.

NOTE: this function allocates temporary memory to store a   copy  of  the
      matrix. If you perform a lot of repeated conversions, we  recommend
      you  to  use  SparseCopyToCRSBuf()  function,   which   can   reuse
      previously allocated memory.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
void sparseconverttocrs(sparsematrix* s, ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t m;
    ae_int_t i;
    ae_int_t j;
    ae_vector tvals;
    ae_vector tidx;
    ae_vector temp;
    ae_vector tridx;
    ae_int_t nonne;
    ae_int_t k;
    ae_int_t offs0;
    ae_int_t offs1;

    ae_frame_make(_state, &_frame_block);
    memset(&tvals, 0, sizeof(tvals));
    memset(&tidx, 0, sizeof(tidx));
    memset(&temp, 0, sizeof(temp));
    memset(&tridx, 0, sizeof(tridx));
    ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tidx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&temp, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tridx, 0, DT_INT, _state, ae_true);

    m = s->m;
    if( s->matrixtype==0 )
    {

        /*
         * From Hash-table to CRS.
         * First, create local copy of the hash table.
         */
        s->matrixtype = 1;
        k = s->tablesize;
        ae_swap_vectors(&s->vals, &tvals);
        ae_swap_vectors(&s->idx, &tidx);

        /*
         * Fill RIdx by number of elements per row:
         * RIdx[I+1] stores number of elements in I-th row.
         *
         * Convert RIdx from row sizes to row offsets.
         * Set NInitialized
         */
        nonne = 0;
        ivectorsetlengthatleast(&s->ridx, s->m+1, _state);
        for(i=0; i<=s->m; i++)
        {
            s->ridx.ptr.p_int[i] = 0;
        }
        for(i=0; i<=k-1; i++)
        {
            if( tidx.ptr.p_int[2*i]>=0 )
            {
                s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]+1] = s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]+1]+1;
                nonne = nonne+1;
            }
        }
        for(i=0; i<=s->m-1; i++)
        {
            s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i+1]+s->ridx.ptr.p_int[i];
        }
        s->ninitialized = s->ridx.ptr.p_int[s->m];

        /*
         * Allocate memory and move elements to Vals/Idx.
         * Initially, elements are sorted by rows, but unsorted within row.
         * After initial insertion we sort elements within row.
         */
        ae_vector_set_length(&temp, s->m, _state);
        for(i=0; i<=s->m-1; i++)
        {
            temp.ptr.p_int[i] = 0;
        }
        rvectorsetlengthatleast(&s->vals, nonne, _state);
        ivectorsetlengthatleast(&s->idx, nonne, _state);
        for(i=0; i<=k-1; i++)
        {
            if( tidx.ptr.p_int[2*i]>=0 )
            {
                s->vals.ptr.p_double[s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]]+temp.ptr.p_int[tidx.ptr.p_int[2*i]]] = tvals.ptr.p_double[i];
                s->idx.ptr.p_int[s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]]+temp.ptr.p_int[tidx.ptr.p_int[2*i]]] = tidx.ptr.p_int[2*i+1];
                temp.ptr.p_int[tidx.ptr.p_int[2*i]] = temp.ptr.p_int[tidx.ptr.p_int[2*i]]+1;
            }
        }
        for(i=0; i<=s->m-1; i++)
        {
            tagsortmiddleir(&s->idx, &s->vals, s->ridx.ptr.p_int[i], s->ridx.ptr.p_int[i+1]-s->ridx.ptr.p_int[i], _state);
        }

        /*
         * Initialization 'S.UIdx' and 'S.DIdx'
         */
        sparseinitduidx(s, _state);
        ae_frame_leave(_state);
        return;
    }
    if( s->matrixtype==1 )
    {

        /*
         * Already CRS
         */
        ae_frame_leave(_state);
        return;
    }
    if( s->matrixtype==2 )
    {
        ae_assert(s->m==s->n, "SparseConvertToCRS: non-square SKS matrices are not supported", _state);

        /*
         * From SKS to CRS.
         *
         * First, create local copy of the SKS matrix (Vals,
         * Idx, RIdx are stored; DIdx/UIdx for some time are
         * left in the SparseMatrix structure).
         */
        s->matrixtype = 1;
        ae_swap_vectors(&s->vals, &tvals);
        ae_swap_vectors(&s->idx, &tidx);
        ae_swap_vectors(&s->ridx, &tridx);

        /*
         * Fill RIdx by number of elements per row:
         * RIdx[I+1] stores number of elements in I-th row.
         *
         * Convert RIdx from row sizes to row offsets.
         * Set NInitialized
         */
        ivectorsetlengthatleast(&s->ridx, m+1, _state);
        s->ridx.ptr.p_int[0] = 0;
        for(i=1; i<=m; i++)
        {
            s->ridx.ptr.p_int[i] = 1;
        }
        nonne = 0;
        for(i=0; i<=m-1; i++)
        {
            s->ridx.ptr.p_int[i+1] = s->didx.ptr.p_int[i]+s->ridx.ptr.p_int[i+1];
            for(j=i-s->uidx.ptr.p_int[i]; j<=i-1; j++)
            {
                s->ridx.ptr.p_int[j+1] = s->ridx.ptr.p_int[j+1]+1;
            }
            nonne = nonne+s->didx.ptr.p_int[i]+1+s->uidx.ptr.p_int[i];
        }
        for(i=0; i<=s->m-1; i++)
        {
            s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i+1]+s->ridx.ptr.p_int[i];
        }
        s->ninitialized = s->ridx.ptr.p_int[s->m];

        /*
         * Allocate memory and move elements to Vals/Idx.
         * Initially, elements are sorted by rows, and are sorted within row too.
         * No additional post-sorting is required.
         */
        ae_vector_set_length(&temp, m, _state);
        for(i=0; i<=m-1; i++)
        {
            temp.ptr.p_int[i] = 0;
        }
        rvectorsetlengthatleast(&s->vals, nonne, _state);
        ivectorsetlengthatleast(&s->idx, nonne, _state);
        for(i=0; i<=m-1; i++)
        {

            /*
             * copy subdiagonal and diagonal parts of I-th block
             */
            offs0 = tridx.ptr.p_int[i];
            offs1 = s->ridx.ptr.p_int[i]+temp.ptr.p_int[i];
            k = s->didx.ptr.p_int[i]+1;
            for(j=0; j<=k-1; j++)
            {
                s->vals.ptr.p_double[offs1+j] = tvals.ptr.p_double[offs0+j];
                s->idx.ptr.p_int[offs1+j] = i-s->didx.ptr.p_int[i]+j;
            }
            temp.ptr.p_int[i] = temp.ptr.p_int[i]+s->didx.ptr.p_int[i]+1;

            /*
             * Copy superdiagonal part of I-th block
             */
            offs0 = tridx.ptr.p_int[i]+s->didx.ptr.p_int[i]+1;
            k = s->uidx.ptr.p_int[i];
            for(j=0; j<=k-1; j++)
            {
                offs1 = s->ridx.ptr.p_int[i-k+j]+temp.ptr.p_int[i-k+j];
                s->vals.ptr.p_double[offs1] = tvals.ptr.p_double[offs0+j];
                s->idx.ptr.p_int[offs1] = i;
                temp.ptr.p_int[i-k+j] = temp.ptr.p_int[i-k+j]+1;
            }
        }

        /*
         * Initialization 'S.UIdx' and 'S.DIdx'
         */
        sparseinitduidx(s, _state);
        ae_frame_leave(_state);
        return;
    }
    ae_assert(ae_false, "SparseConvertToCRS: invalid matrix type", _state);
    ae_frame_leave(_state);
}


/*************************************************************************
This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
copied to S1 and converted on-the-fly.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in CRS format.

NOTE: if S0 is stored as CRS, it is just copied without conversion.

NOTE: this function de-allocates memory occupied by S1 before starting CRS
      conversion. If you perform a lot of repeated CRS conversions, it may
      lead to memory fragmentation. In this case we recommend you  to  use
      SparseCopyToCRSBuf() function which re-uses memory in S1 as much  as
      possible.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytocrs(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
{

    _sparsematrix_clear(s1);

    ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToCRS: invalid matrix type", _state);
    sparsecopytocrsbuf(s0, s1, _state);
}


/*************************************************************************
This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
copied to S1 and converted on-the-fly. Memory allocated in S1 is reused to
maximum extent possible.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.
    S1          -   matrix which may contain some pre-allocated memory, or
                    can be just uninitialized structure.

OUTPUT PARAMETERS
    S1          -   sparse matrix in CRS format.

NOTE: if S0 is stored as CRS, it is just copied without conversion.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytocrsbuf(sparsematrix* s0,
     sparsematrix* s1,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector temp;
    ae_int_t nonne;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t offs0;
    ae_int_t offs1;
    ae_int_t m;

    ae_frame_make(_state, &_frame_block);
    memset(&temp, 0, sizeof(temp));
    ae_vector_init(&temp, 0, DT_INT, _state, ae_true);

    ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToCRSBuf: invalid matrix type", _state);
    m = s0->m;
    if( s0->matrixtype==0 )
    {

        /*
         * Convert from hash-table to CRS
         * Done like ConvertToCRS function
         */
        s1->matrixtype = 1;
        s1->m = s0->m;
        s1->n = s0->n;
        s1->nfree = s0->nfree;
        nonne = 0;
        k = s0->tablesize;
        ivectorsetlengthatleast(&s1->ridx, s1->m+1, _state);
        for(i=0; i<=s1->m; i++)
        {
            s1->ridx.ptr.p_int[i] = 0;
        }
        ae_vector_set_length(&temp, s1->m, _state);
        for(i=0; i<=s1->m-1; i++)
        {
            temp.ptr.p_int[i] = 0;
        }

        /*
         * Number of elements per row
         */
        for(i=0; i<=k-1; i++)
        {
            if( s0->idx.ptr.p_int[2*i]>=0 )
            {
                s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]+1] = s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]+1]+1;
                nonne = nonne+1;
            }
        }

        /*
         * Fill RIdx (offsets of rows)
         */
        for(i=0; i<=s1->m-1; i++)
        {
            s1->ridx.ptr.p_int[i+1] = s1->ridx.ptr.p_int[i+1]+s1->ridx.ptr.p_int[i];
        }

        /*
         * Allocate memory
         */
        rvectorsetlengthatleast(&s1->vals, nonne, _state);
        ivectorsetlengthatleast(&s1->idx, nonne, _state);
        for(i=0; i<=k-1; i++)
        {
            if( s0->idx.ptr.p_int[2*i]>=0 )
            {
                s1->vals.ptr.p_double[s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]]+temp.ptr.p_int[s0->idx.ptr.p_int[2*i]]] = s0->vals.ptr.p_double[i];
                s1->idx.ptr.p_int[s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]]+temp.ptr.p_int[s0->idx.ptr.p_int[2*i]]] = s0->idx.ptr.p_int[2*i+1];
                temp.ptr.p_int[s0->idx.ptr.p_int[2*i]] = temp.ptr.p_int[s0->idx.ptr.p_int[2*i]]+1;
            }
        }

        /*
         * Set NInitialized
         */
        s1->ninitialized = s1->ridx.ptr.p_int[s1->m];

        /*
         * Sorting of elements
         */
        for(i=0; i<=s1->m-1; i++)
        {
            tagsortmiddleir(&s1->idx, &s1->vals, s1->ridx.ptr.p_int[i], s1->ridx.ptr.p_int[i+1]-s1->ridx.ptr.p_int[i], _state);
        }

        /*
         * Initialization 'S.UIdx' and 'S.DIdx'
         */
        sparseinitduidx(s1, _state);
        ae_frame_leave(_state);
        return;
    }
    if( s0->matrixtype==1 )
    {

        /*
         * Already CRS, just copy
         */
        sparsecopybuf(s0, s1, _state);
        ae_frame_leave(_state);
        return;
    }
    if( s0->matrixtype==2 )
    {
        ae_assert(s0->m==s0->n, "SparseCopyToCRS: non-square SKS matrices are not supported", _state);

        /*
         * From SKS to CRS.
         */
        s1->m = s0->m;
        s1->n = s0->n;
        s1->matrixtype = 1;

        /*
         * Fill RIdx by number of elements per row:
         * RIdx[I+1] stores number of elements in I-th row.
         *
         * Convert RIdx from row sizes to row offsets.
         * Set NInitialized
         */
        ivectorsetlengthatleast(&s1->ridx, m+1, _state);
        s1->ridx.ptr.p_int[0] = 0;
        for(i=1; i<=m; i++)
        {
            s1->ridx.ptr.p_int[i] = 1;
        }
        nonne = 0;
        for(i=0; i<=m-1; i++)
        {
            s1->ridx.ptr.p_int[i+1] = s0->didx.ptr.p_int[i]+s1->ridx.ptr.p_int[i+1];
            for(j=i-s0->uidx.ptr.p_int[i]; j<=i-1; j++)
            {
                s1->ridx.ptr.p_int[j+1] = s1->ridx.ptr.p_int[j+1]+1;
            }
            nonne = nonne+s0->didx.ptr.p_int[i]+1+s0->uidx.ptr.p_int[i];
        }
        for(i=0; i<=m-1; i++)
        {
            s1->ridx.ptr.p_int[i+1] = s1->ridx.ptr.p_int[i+1]+s1->ridx.ptr.p_int[i];
        }
        s1->ninitialized = s1->ridx.ptr.p_int[m];

        /*
         * Allocate memory and move elements to Vals/Idx.
         * Initially, elements are sorted by rows, and are sorted within row too.
         * No additional post-sorting is required.
         */
        ae_vector_set_length(&temp, m, _state);
        for(i=0; i<=m-1; i++)
        {
            temp.ptr.p_int[i] = 0;
        }
        rvectorsetlengthatleast(&s1->vals, nonne, _state);
        ivectorsetlengthatleast(&s1->idx, nonne, _state);
        for(i=0; i<=m-1; i++)
        {

            /*
             * copy subdiagonal and diagonal parts of I-th block
             */
            offs0 = s0->ridx.ptr.p_int[i];
            offs1 = s1->ridx.ptr.p_int[i]+temp.ptr.p_int[i];
            k = s0->didx.ptr.p_int[i]+1;
            for(j=0; j<=k-1; j++)
            {
                s1->vals.ptr.p_double[offs1+j] = s0->vals.ptr.p_double[offs0+j];
                s1->idx.ptr.p_int[offs1+j] = i-s0->didx.ptr.p_int[i]+j;
            }
            temp.ptr.p_int[i] = temp.ptr.p_int[i]+s0->didx.ptr.p_int[i]+1;

            /*
             * Copy superdiagonal part of I-th block
             */
            offs0 = s0->ridx.ptr.p_int[i]+s0->didx.ptr.p_int[i]+1;
            k = s0->uidx.ptr.p_int[i];
            for(j=0; j<=k-1; j++)
            {
                offs1 = s1->ridx.ptr.p_int[i-k+j]+temp.ptr.p_int[i-k+j];
                s1->vals.ptr.p_double[offs1] = s0->vals.ptr.p_double[offs0+j];
                s1->idx.ptr.p_int[offs1] = i;
                temp.ptr.p_int[i-k+j] = temp.ptr.p_int[i-k+j]+1;
            }
        }

        /*
         * Initialization 'S.UIdx' and 'S.DIdx'
         */
        sparseinitduidx(s1, _state);
        ae_frame_leave(_state);
        return;
    }
    ae_assert(ae_false, "SparseCopyToCRSBuf: unexpected matrix type", _state);
    ae_frame_leave(_state);
}


/*************************************************************************
This function performs in-place conversion to SKS format.

INPUT PARAMETERS
    S           -   sparse matrix in any format.

OUTPUT PARAMETERS
    S           -   sparse matrix in SKS format.

NOTE: this  function  has   no  effect  when  called with matrix which  is
      already in SKS mode.

NOTE: in-place conversion involves allocation of temporary arrays. If  you
      perform a lot of repeated in- place  conversions,  it  may  lead  to
      memory fragmentation. Consider using out-of-place SparseCopyToSKSBuf()
      function in this case.

  -- ALGLIB PROJECT --
     Copyright 15.01.2014 by Bochkanov Sergey
*************************************************************************/
void sparseconverttosks(sparsematrix* s, ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tridx;
    ae_vector tdidx;
    ae_vector tuidx;
    ae_vector tvals;
    ae_int_t n;
    ae_int_t t0;
    ae_int_t t1;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    double v;

    ae_frame_make(_state, &_frame_block);
    memset(&tridx, 0, sizeof(tridx));
    memset(&tdidx, 0, sizeof(tdidx));
    memset(&tuidx, 0, sizeof(tuidx));
    memset(&tvals, 0, sizeof(tvals));
    ae_vector_init(&tridx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tdidx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tuidx, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);

    ae_assert((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2, "SparseConvertToSKS: invalid matrix type", _state);
    ae_assert(s->m==s->n, "SparseConvertToSKS: rectangular matrices are not supported", _state);
    n = s->n;
    if( s->matrixtype==2 )
    {

        /*
         * Already in SKS mode
         */
        ae_frame_leave(_state);
        return;
    }

    /*
     * Generate internal copy of SKS matrix
     */
    ivectorsetlengthatleast(&tdidx, n+1, _state);
    ivectorsetlengthatleast(&tuidx, n+1, _state);
    for(i=0; i<=n; i++)
    {
        tdidx.ptr.p_int[i] = 0;
        tuidx.ptr.p_int[i] = 0;
    }
    t0 = 0;
    t1 = 0;
    while(sparseenumerate(s, &t0, &t1, &i, &j, &v, _state))
    {
        if( j<i )
        {
            tdidx.ptr.p_int[i] = ae_maxint(tdidx.ptr.p_int[i], i-j, _state);
        }
        else
        {
            tuidx.ptr.p_int[j] = ae_maxint(tuidx.ptr.p_int[j], j-i, _state);
        }
    }
    ivectorsetlengthatleast(&tridx, n+1, _state);
    tridx.ptr.p_int[0] = 0;
    for(i=1; i<=n; i++)
    {
        tridx.ptr.p_int[i] = tridx.ptr.p_int[i-1]+tdidx.ptr.p_int[i-1]+1+tuidx.ptr.p_int[i-1];
    }
    rvectorsetlengthatleast(&tvals, tridx.ptr.p_int[n], _state);
    k = tridx.ptr.p_int[n];
    for(i=0; i<=k-1; i++)
    {
        tvals.ptr.p_double[i] = 0.0;
    }
    t0 = 0;
    t1 = 0;
    while(sparseenumerate(s, &t0, &t1, &i, &j, &v, _state))
    {
        if( j<=i )
        {
            tvals.ptr.p_double[tridx.ptr.p_int[i]+tdidx.ptr.p_int[i]-(i-j)] = v;
        }
        else
        {
            tvals.ptr.p_double[tridx.ptr.p_int[j+1]-(j-i)] = v;
        }
    }
    for(i=0; i<=n-1; i++)
    {
        tdidx.ptr.p_int[n] = ae_maxint(tdidx.ptr.p_int[n], tdidx.ptr.p_int[i], _state);
        tuidx.ptr.p_int[n] = ae_maxint(tuidx.ptr.p_int[n], tuidx.ptr.p_int[i], _state);
    }
    s->matrixtype = 2;
    s->ninitialized = 0;
    s->nfree = 0;
    s->m = n;
    s->n = n;
    ae_swap_vectors(&s->didx, &tdidx);
    ae_swap_vectors(&s->uidx, &tuidx);
    ae_swap_vectors(&s->ridx, &tridx);
    ae_swap_vectors(&s->vals, &tvals);
    ae_frame_leave(_state);
}


/*************************************************************************
This  function  performs  out-of-place  conversion  to SKS storage format.
S0 is copied to S1 and converted on-the-fly.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in SKS format.

NOTE: if S0 is stored as SKS, it is just copied without conversion.

NOTE: this function de-allocates memory  occupied  by  S1 before  starting
      conversion. If you perform a  lot  of  repeated  conversions, it may
      lead to memory fragmentation. In this case we recommend you  to  use
      SparseCopyToSKSBuf() function which re-uses memory in S1 as much  as
      possible.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytosks(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
{

    _sparsematrix_clear(s1);

    ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToSKS: invalid matrix type", _state);
    sparsecopytosksbuf(s0, s1, _state);
}


/*************************************************************************
This  function  performs  out-of-place  conversion  to SKS format.  S0  is
copied to S1 and converted on-the-fly. Memory  allocated  in S1 is  reused
to maximum extent possible.

INPUT PARAMETERS
    S0          -   sparse matrix in any format.

OUTPUT PARAMETERS
    S1          -   sparse matrix in SKS format.

NOTE: if S0 is stored as SKS, it is just copied without conversion.

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsecopytosksbuf(sparsematrix* s0,
     sparsematrix* s1,
     ae_state *_state)
{
    double v;
    ae_int_t n;
    ae_int_t t0;
    ae_int_t t1;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;


    ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToSKSBuf: invalid matrix type", _state);
    ae_assert(s0->m==s0->n, "SparseCopyToSKSBuf: rectangular matrices are not supported", _state);
    n = s0->n;
    if( s0->matrixtype==2 )
    {

        /*
         * Already SKS, just copy
         */
        sparsecopybuf(s0, s1, _state);
        return;
    }

    /*
     * Generate copy of matrix in the SKS format
     */
    ivectorsetlengthatleast(&s1->didx, n+1, _state);
    ivectorsetlengthatleast(&s1->uidx, n+1, _state);
    for(i=0; i<=n; i++)
    {
        s1->didx.ptr.p_int[i] = 0;
        s1->uidx.ptr.p_int[i] = 0;
    }
    t0 = 0;
    t1 = 0;
    while(sparseenumerate(s0, &t0, &t1, &i, &j, &v, _state))
    {
        if( j<i )
        {
            s1->didx.ptr.p_int[i] = ae_maxint(s1->didx.ptr.p_int[i], i-j, _state);
        }
        else
        {
            s1->uidx.ptr.p_int[j] = ae_maxint(s1->uidx.ptr.p_int[j], j-i, _state);
        }
    }
    ivectorsetlengthatleast(&s1->ridx, n+1, _state);
    s1->ridx.ptr.p_int[0] = 0;
    for(i=1; i<=n; i++)
    {
        s1->ridx.ptr.p_int[i] = s1->ridx.ptr.p_int[i-1]+s1->didx.ptr.p_int[i-1]+1+s1->uidx.ptr.p_int[i-1];
    }
    rvectorsetlengthatleast(&s1->vals, s1->ridx.ptr.p_int[n], _state);
    k = s1->ridx.ptr.p_int[n];
    for(i=0; i<=k-1; i++)
    {
        s1->vals.ptr.p_double[i] = 0.0;
    }
    t0 = 0;
    t1 = 0;
    while(sparseenumerate(s0, &t0, &t1, &i, &j, &v, _state))
    {
        if( j<=i )
        {
            s1->vals.ptr.p_double[s1->ridx.ptr.p_int[i]+s1->didx.ptr.p_int[i]-(i-j)] = v;
        }
        else
        {
            s1->vals.ptr.p_double[s1->ridx.ptr.p_int[j+1]-(j-i)] = v;
        }
    }
    for(i=0; i<=n-1; i++)
    {
        s1->didx.ptr.p_int[n] = ae_maxint(s1->didx.ptr.p_int[n], s1->didx.ptr.p_int[i], _state);
        s1->uidx.ptr.p_int[n] = ae_maxint(s1->uidx.ptr.p_int[n], s1->uidx.ptr.p_int[i], _state);
    }
    s1->matrixtype = 2;
    s1->ninitialized = 0;
    s1->nfree = 0;
    s1->m = n;
    s1->n = n;
}


/*************************************************************************
This non-accessible to user function performs  in-place  creation  of  CRS
matrix. It is expected that:
* S.M and S.N are initialized
* S.RIdx, S.Idx and S.Vals are loaded with values in CRS  format  used  by
  ALGLIB, with elements of S.Idx/S.Vals  possibly  being  unsorted  within
  each row (this constructor function may post-sort matrix,  assuming that
  it is sorted by rows).

Only 5 fields should be set by caller. Other fields will be  rewritten  by
this constructor function.

This function performs integrity check on user-specified values, with  the
only exception being Vals[] array:
* it does not require values to be non-zero
* it does not check for elements of Vals[] being finite IEEE-754 values

INPUT PARAMETERS
    S   -   sparse matrix with corresponding fields set by caller

OUTPUT PARAMETERS
    S   -   sparse matrix in CRS format.

  -- ALGLIB PROJECT --
     Copyright 20.08.2016 by Bochkanov Sergey
*************************************************************************/
void sparsecreatecrsinplace(sparsematrix* s, ae_state *_state)
{
    ae_int_t m;
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t j0;
    ae_int_t j1;


    m = s->m;
    n = s->n;

    /*
     * Quick exit for M=0 or N=0
     */
    ae_assert(s->m>=0, "SparseCreateCRSInplace: integrity check failed", _state);
    ae_assert(s->n>=0, "SparseCreateCRSInplace: integrity check failed", _state);
    if( m==0||n==0 )
    {
        s->matrixtype = 1;
        s->ninitialized = 0;
        ivectorsetlengthatleast(&s->ridx, s->m+1, _state);
        ivectorsetlengthatleast(&s->didx, s->m, _state);
        ivectorsetlengthatleast(&s->uidx, s->m, _state);
        for(i=0; i<=s->m-1; i++)
        {
            s->ridx.ptr.p_int[i] = 0;
            s->uidx.ptr.p_int[i] = 0;
            s->didx.ptr.p_int[i] = 0;
        }
        s->ridx.ptr.p_int[s->m] = 0;
        return;
    }

    /*
     * Perform integrity check
     */
    ae_assert(s->m>0, "SparseCreateCRSInplace: integrity check failed", _state);
    ae_assert(s->n>0, "SparseCreateCRSInplace: integrity check failed", _state);
    ae_assert(s->ridx.cnt>=m+1, "SparseCreateCRSInplace: integrity check failed", _state);
    for(i=0; i<=m-1; i++)
    {
        ae_assert(s->ridx.ptr.p_int[i]>=0&&s->ridx.ptr.p_int[i]<=s->ridx.ptr.p_int[i+1], "SparseCreateCRSInplace: integrity check failed", _state);
    }
    ae_assert(s->ridx.ptr.p_int[m]<=s->idx.cnt, "SparseCreateCRSInplace: integrity check failed", _state);
    ae_assert(s->ridx.ptr.p_int[m]<=s->vals.cnt, "SparseCreateCRSInplace: integrity check failed", _state);
    for(i=0; i<=m-1; i++)
    {
        j0 = s->ridx.ptr.p_int[i];
        j1 = s->ridx.ptr.p_int[i+1]-1;
        for(j=j0; j<=j1; j++)
        {
            ae_assert(s->idx.ptr.p_int[j]>=0&&s->idx.ptr.p_int[j]<n, "SparseCreateCRSInplace: integrity check failed", _state);
        }
    }

    /*
     * Initialize
     */
    s->matrixtype = 1;
    s->ninitialized = s->ridx.ptr.p_int[m];
    for(i=0; i<=m-1; i++)
    {
        tagsortmiddleir(&s->idx, &s->vals, s->ridx.ptr.p_int[i], s->ridx.ptr.p_int[i+1]-s->ridx.ptr.p_int[i], _state);
    }
    sparseinitduidx(s, _state);
}


/*************************************************************************
This function returns type of the matrix storage format.

INPUT PARAMETERS:
    S           -   sparse matrix.

RESULT:
    sparse storage format used by matrix:
        0   -   Hash-table
        1   -   CRS (compressed row storage)
        2   -   SKS (skyline)

NOTE: future  versions  of  ALGLIB  may  include additional sparse storage
      formats.


  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetmatrixtype(sparsematrix* s, ae_state *_state)
{
    ae_int_t result;


    ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseGetMatrixType: invalid matrix type", _state);
    result = s->matrixtype;
    return result;
}


/*************************************************************************
This function checks matrix storage format and returns True when matrix is
stored using Hash table representation.

INPUT PARAMETERS:
    S   -   sparse matrix.

RESULT:
    True if matrix type is Hash table
    False if matrix type is not Hash table

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
ae_bool sparseishash(sparsematrix* s, ae_state *_state)
{
    ae_bool result;


    ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseIsHash: invalid matrix type", _state);
    result = s->matrixtype==0;
    return result;
}


/*************************************************************************
This function checks matrix storage format and returns True when matrix is
stored using CRS representation.

INPUT PARAMETERS:
    S   -   sparse matrix.

RESULT:
    True if matrix type is CRS
    False if matrix type is not CRS

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
ae_bool sparseiscrs(sparsematrix* s, ae_state *_state)
{
    ae_bool result;


    ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseIsCRS: invalid matrix type", _state);
    result = s->matrixtype==1;
    return result;
}


/*************************************************************************
This function checks matrix storage format and returns True when matrix is
stored using SKS representation.

INPUT PARAMETERS:
    S   -   sparse matrix.

RESULT:
    True if matrix type is SKS
    False if matrix type is not SKS

  -- ALGLIB PROJECT --
     Copyright 20.07.2012 by Bochkanov Sergey
*************************************************************************/
ae_bool sparseissks(sparsematrix* s, ae_state *_state)
{
    ae_bool result;


    ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseIsSKS: invalid matrix type", _state);
    result = s->matrixtype==2;
    return result;
}


/*************************************************************************
The function frees all memory occupied by  sparse  matrix.  Sparse  matrix
structure becomes unusable after this call.

OUTPUT PARAMETERS
    S   -   sparse matrix to delete

  -- ALGLIB PROJECT --
     Copyright 24.07.2012 by Bochkanov Sergey
*************************************************************************/
void sparsefree(sparsematrix* s, ae_state *_state)
{

    _sparsematrix_clear(s);

    s->matrixtype = -1;
    s->m = 0;
    s->n = 0;
    s->nfree = 0;
    s->ninitialized = 0;
    s->tablesize = 0;
}


/*************************************************************************
The function returns number of rows of a sparse matrix.

RESULT: number of rows of a sparse matrix.

  -- ALGLIB PROJECT --
     Copyright 23.08.2012 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetnrows(sparsematrix* s, ae_state *_state)
{
    ae_int_t result;


    result = s->m;
    return result;
}


/*************************************************************************
The function returns number of columns of a sparse matrix.

RESULT: number of columns of a sparse matrix.

  -- ALGLIB PROJECT --
     Copyright 23.08.2012 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetncols(sparsematrix* s, ae_state *_state)
{
    ae_int_t result;


    result = s->n;
    return result;
}


/*************************************************************************
The function returns number of strictly upper triangular non-zero elements
in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
in the sparse matrix data structure. If some element  has  zero  numerical
value, it is still counted.

This function has different cost for different types of matrices:
* for hash-based matrices it involves complete pass over entire hash-table
  with O(NNZ) cost, where NNZ is number of non-zero elements
* for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).

RESULT: number of non-zero elements strictly above main diagonal

  -- ALGLIB PROJECT --
     Copyright 12.02.2014 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetuppercount(sparsematrix* s, ae_state *_state)
{
    ae_int_t sz;
    ae_int_t i0;
    ae_int_t i;
    ae_int_t result;


    result = -1;
    if( s->matrixtype==0 )
    {

        /*
         * Hash-table matrix
         */
        result = 0;
        sz = s->tablesize;
        for(i0=0; i0<=sz-1; i0++)
        {
            i = s->idx.ptr.p_int[2*i0];
            if( i>=0&&s->idx.ptr.p_int[2*i0+1]>i )
            {
                result = result+1;
            }
        }
        return result;
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS matrix
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGetUpperCount: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        result = 0;
        sz = s->m;
        for(i=0; i<=sz-1; i++)
        {
            result = result+(s->ridx.ptr.p_int[i+1]-s->uidx.ptr.p_int[i]);
        }
        return result;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS matrix
         */
        ae_assert(s->m==s->n, "SparseGetUpperCount: non-square SKS matrices are not supported", _state);
        result = 0;
        sz = s->m;
        for(i=0; i<=sz-1; i++)
        {
            result = result+s->uidx.ptr.p_int[i];
        }
        return result;
    }
    ae_assert(ae_false, "SparseGetUpperCount: internal error", _state);
    return result;
}


/*************************************************************************
The function returns number of strictly lower triangular non-zero elements
in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
in the sparse matrix data structure. If some element  has  zero  numerical
value, it is still counted.

This function has different cost for different types of matrices:
* for hash-based matrices it involves complete pass over entire hash-table
  with O(NNZ) cost, where NNZ is number of non-zero elements
* for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).

RESULT: number of non-zero elements strictly below main diagonal

  -- ALGLIB PROJECT --
     Copyright 12.02.2014 by Bochkanov Sergey
*************************************************************************/
ae_int_t sparsegetlowercount(sparsematrix* s, ae_state *_state)
{
    ae_int_t sz;
    ae_int_t i0;
    ae_int_t i;
    ae_int_t result;


    result = -1;
    if( s->matrixtype==0 )
    {

        /*
         * Hash-table matrix
         */
        result = 0;
        sz = s->tablesize;
        for(i0=0; i0<=sz-1; i0++)
        {
            i = s->idx.ptr.p_int[2*i0];
            if( i>=0&&s->idx.ptr.p_int[2*i0+1]<i )
            {
                result = result+1;
            }
        }
        return result;
    }
    if( s->matrixtype==1 )
    {

        /*
         * CRS matrix
         */
        ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGetUpperCount: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
        result = 0;
        sz = s->m;
        for(i=0; i<=sz-1; i++)
        {
            result = result+(s->didx.ptr.p_int[i]-s->ridx.ptr.p_int[i]);
        }
        return result;
    }
    if( s->matrixtype==2 )
    {

        /*
         * SKS matrix
         */
        ae_assert(s->m==s->n, "SparseGetUpperCount: non-square SKS matrices are not supported", _state);
        result = 0;
        sz = s->m;
        for(i=0; i<=sz-1; i++)
        {
            result = result+s->didx.ptr.p_int[i];
        }
        return result;
    }
    ae_assert(ae_false, "SparseGetUpperCount: internal error", _state);
    return result;
}


/*************************************************************************
Serializer: allocation.

INTERNAL-ONLY FUNCTION, SUPPORTS ONLY CRS MATRICES

  -- ALGLIB --
     Copyright 20.07.2021 by Bochkanov Sergey
*************************************************************************/
void sparsealloc(ae_serializer* s, sparsematrix* a, ae_state *_state)
{
    ae_int_t i;
    ae_int_t nused;


    ae_assert((a->matrixtype==0||a->matrixtype==1)||a->matrixtype==2, "SparseAlloc: only CRS/SKS matrices are supported", _state);

    /*
     * Header
     */
    ae_serializer_alloc_entry(s);
    ae_serializer_alloc_entry(s);
    ae_serializer_alloc_entry(s);

    /*
     * Alloc other parameters
     */
    if( a->matrixtype==0 )
    {

        /*
         * Alloc Hash
         */
        nused = 0;
        for(i=0; i<=a->tablesize-1; i++)
        {
            if( a->idx.ptr.p_int[2*i+0]>=0 )
            {
                nused = nused+1;
            }
        }
        ae_serializer_alloc_entry(s);
        ae_serializer_alloc_entry(s);
        ae_serializer_alloc_entry(s);
        for(i=0; i<=a->tablesize-1; i++)
        {
            if( a->idx.ptr.p_int[2*i+0]>=0 )
            {
                ae_serializer_alloc_entry(s);
                ae_serializer_alloc_entry(s);
                ae_serializer_alloc_entry(s);
            }
        }
    }
    if( a->matrixtype==1 )
    {

        /*
         * Alloc CRS
         */
        ae_serializer_alloc_entry(s);
        ae_serializer_alloc_entry(s);
        ae_serializer_alloc_entry(s);
        allocintegerarray(s, &a->ridx, a->m+1, _state);
        allocintegerarray(s, &a->idx, a->ridx.ptr.p_int[a->m], _state);
        allocrealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
    }
    if( a->matrixtype==2 )
    {

        /*
         * Alloc SKS
         */
        ae_assert(a->m==a->n, "SparseAlloc: rectangular SKS serialization is not supported", _state);
        ae_serializer_alloc_entry(s);
        ae_serializer_alloc_entry(s);
        allocintegerarray(s, &a->ridx, a->m+1, _state);
        allocintegerarray(s, &a->didx, a->n+1, _state);
        allocintegerarray(s, &a->uidx, a->n+1, _state);
        allocrealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
    }

    /*
     * End of stream
     */
    ae_serializer_alloc_entry(s);
}


/*************************************************************************
Serializer: serialization

INTERNAL-ONLY FUNCTION, SUPPORTS ONLY CRS MATRICES

  -- ALGLIB --
     Copyright 20.07.2021 by Bochkanov Sergey
*************************************************************************/
void sparseserialize(ae_serializer* s, sparsematrix* a, ae_state *_state)
{
    ae_int_t i;
    ae_int_t nused;


    ae_assert((a->matrixtype==0||a->matrixtype==1)||a->matrixtype==2, "SparseSerialize: only CRS/SKS matrices are supported", _state);

    /*
     * Header
     */
    ae_serializer_serialize_int(s, getsparsematrixserializationcode(_state), _state);
    ae_serializer_serialize_int(s, a->matrixtype, _state);
    ae_serializer_serialize_int(s, 0, _state);

    /*
     * Serialize other parameters
     */
    if( a->matrixtype==0 )
    {

        /*
         * Serialize Hash
         */
        nused = 0;
        for(i=0; i<=a->tablesize-1; i++)
        {
            if( a->idx.ptr.p_int[2*i+0]>=0 )
            {
                nused = nused+1;
            }
        }
        ae_serializer_serialize_int(s, a->m, _state);
        ae_serializer_serialize_int(s, a->n, _state);
        ae_serializer_serialize_int(s, nused, _state);
        for(i=0; i<=a->tablesize-1; i++)
        {
            if( a->idx.ptr.p_int[2*i+0]>=0 )
            {
                ae_serializer_serialize_int(s, a->idx.ptr.p_int[2*i+0], _state);
                ae_serializer_serialize_int(s, a->idx.ptr.p_int[2*i+1], _state);
                ae_serializer_serialize_double(s, a->vals.ptr.p_double[i], _state);
            }
        }
    }
    if( a->matrixtype==1 )
    {

        /*
         * Serialize CRS
         */
        ae_serializer_serialize_int(s, a->m, _state);
        ae_serializer_serialize_int(s, a->n, _state);
        ae_serializer_serialize_int(s, a->ninitialized, _state);
        serializeintegerarray(s, &a->ridx, a->m+1, _state);
        serializeintegerarray(s, &a->idx, a->ridx.ptr.p_int[a->m], _state);
        serializerealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
    }
    if( a->matrixtype==2 )
    {

        /*
         * Serialize SKS
         */
        ae_assert(a->m==a->n, "SparseSerialize: rectangular SKS serialization is not supported", _state);
        ae_serializer_serialize_int(s, a->m, _state);
        ae_serializer_serialize_int(s, a->n, _state);
        serializeintegerarray(s, &a->ridx, a->m+1, _state);
        serializeintegerarray(s, &a->didx, a->n+1, _state);
        serializeintegerarray(s, &a->uidx, a->n+1, _state);
        serializerealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
    }

    /*
     * End of stream
     */
    ae_serializer_serialize_int(s, 117, _state);
}


/*************************************************************************
Serializer: unserialization

  -- ALGLIB --
     Copyright 20.07.2021 by Bochkanov Sergey
*************************************************************************/
void sparseunserialize(ae_serializer* s,
     sparsematrix* a,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t i0;
    ae_int_t i1;
    ae_int_t m;
    ae_int_t n;
    ae_int_t nused;
    ae_int_t k;
    double v;

    _sparsematrix_clear(a);


    /*
     * Check stream header: scode, matrix type, version type
     */
    ae_serializer_unserialize_int(s, &k, _state);
    ae_assert(k==getsparsematrixserializationcode(_state), "SparseUnserialize: stream header corrupted", _state);
    ae_serializer_unserialize_int(s, &a->matrixtype, _state);
    ae_assert((a->matrixtype==0||a->matrixtype==1)||a->matrixtype==2, "SparseUnserialize: unexpected matrix type", _state);
    ae_serializer_unserialize_int(s, &k, _state);
    ae_assert(k==0, "SparseUnserialize: stream header corrupted", _state);

    /*
     * Unserialize other parameters
     */
    if( a->matrixtype==0 )
    {

        /*
         * Unerialize Hash
         */
        ae_serializer_unserialize_int(s, &m, _state);
        ae_serializer_unserialize_int(s, &n, _state);
        ae_serializer_unserialize_int(s, &nused, _state);
        sparsecreate(m, n, nused, a, _state);
        for(i=0; i<=nused-1; i++)
        {
            ae_serializer_unserialize_int(s, &i0, _state);
            ae_serializer_unserialize_int(s, &i1, _state);
            ae_serializer_unserialize_double(s, &v, _state);
            sparseset(a, i0, i1, v, _state);
        }
    }
    if( a->matrixtype==1 )
    {

        /*
         * Unserialize CRS
         */
        ae_serializer_unserialize_int(s, &a->m, _state);
        ae_serializer_unserialize_int(s, &a->n, _state);
        ae_serializer_unserialize_int(s, &a->ninitialized, _state);
        unserializeintegerarray(s, &a->ridx, _state);
        unserializeintegerarray(s, &a->idx, _state);
        unserializerealarray(s, &a->vals, _state);
        sparseinitduidx(a, _state);
    }
    if( a->matrixtype==2 )
    {

        /*
         * Unserialize SKS
         */
        ae_serializer_unserialize_int(s, &a->m, _state);
        ae_serializer_unserialize_int(s, &a->n, _state);
        ae_assert(a->m==a->n, "SparseUnserialize: rectangular SKS unserialization is not supported", _state);
        unserializeintegerarray(s, &a->ridx, _state);
        unserializeintegerarray(s, &a->didx, _state);
        unserializeintegerarray(s, &a->uidx, _state);
        unserializerealarray(s, &a->vals, _state);
    }

    /*
     * End of stream
     */
    ae_serializer_unserialize_int(s, &k, _state);
    ae_assert(k==117, "SparseMatrixUnserialize: end-of-stream marker not found", _state);
}


/*************************************************************************
This is hash function.

  -- ALGLIB PROJECT --
     Copyright 14.10.2011 by Bochkanov Sergey
*************************************************************************/
static ae_int_t sparse_hash(ae_int_t i,
     ae_int_t j,
     ae_int_t tabsize,
     ae_state *_state)
{
    ae_frame _frame_block;
    hqrndstate r;
    ae_int_t result;

    ae_frame_make(_state, &_frame_block);
    memset(&r, 0, sizeof(r));
    _hqrndstate_init(&r, _state, ae_true);

    hqrndseed(i, j, &r, _state);
    result = hqrnduniformi(&r, tabsize, _state);
    ae_frame_leave(_state);
    return result;
}


void _sparsematrix_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    sparsematrix *p = (sparsematrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->vals, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->idx, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->ridx, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->didx, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->uidx, 0, DT_INT, _state, make_automatic);
}


void _sparsematrix_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    sparsematrix *dst = (sparsematrix*)_dst;
    sparsematrix *src = (sparsematrix*)_src;
    ae_vector_init_copy(&dst->vals, &src->vals, _state, make_automatic);
    ae_vector_init_copy(&dst->idx, &src->idx, _state, make_automatic);
    ae_vector_init_copy(&dst->ridx, &src->ridx, _state, make_automatic);
    ae_vector_init_copy(&dst->didx, &src->didx, _state, make_automatic);
    ae_vector_init_copy(&dst->uidx, &src->uidx, _state, make_automatic);
    dst->matrixtype = src->matrixtype;
    dst->m = src->m;
    dst->n = src->n;
    dst->nfree = src->nfree;
    dst->ninitialized = src->ninitialized;
    dst->tablesize = src->tablesize;
}


void _sparsematrix_clear(void* _p)
{
    sparsematrix *p = (sparsematrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->vals);
    ae_vector_clear(&p->idx);
    ae_vector_clear(&p->ridx);
    ae_vector_clear(&p->didx);
    ae_vector_clear(&p->uidx);
}


void _sparsematrix_destroy(void* _p)
{
    sparsematrix *p = (sparsematrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->vals);
    ae_vector_destroy(&p->idx);
    ae_vector_destroy(&p->ridx);
    ae_vector_destroy(&p->didx);
    ae_vector_destroy(&p->uidx);
}


void _sparsebuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    sparsebuffers *p = (sparsebuffers*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->d, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->u, 0, DT_INT, _state, make_automatic);
    _sparsematrix_init(&p->s, _state, make_automatic);
}


void _sparsebuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    sparsebuffers *dst = (sparsebuffers*)_dst;
    sparsebuffers *src = (sparsebuffers*)_src;
    ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic);
    ae_vector_init_copy(&dst->u, &src->u, _state, make_automatic);
    _sparsematrix_init_copy(&dst->s, &src->s, _state, make_automatic);
}


void _sparsebuffers_clear(void* _p)
{
    sparsebuffers *p = (sparsebuffers*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->d);
    ae_vector_clear(&p->u);
    _sparsematrix_clear(&p->s);
}


void _sparsebuffers_destroy(void* _p)
{
    sparsebuffers *p = (sparsebuffers*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->d);
    ae_vector_destroy(&p->u);
    _sparsematrix_destroy(&p->s);
}


#endif
#if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)


void rmatrixinternalschurdecomposition(/* Real    */ ae_matrix* h,
     ae_int_t n,
     ae_int_t tneeded,
     ae_int_t zneeded,
     /* Real    */ ae_vector* wr,
     /* Real    */ ae_vector* wi,
     /* Real    */ ae_matrix* z,
     ae_int_t* info,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_matrix h1;
    ae_matrix z1;
    ae_vector wr1;
    ae_vector wi1;

    ae_frame_make(_state, &_frame_block);
    memset(&h1, 0, sizeof(h1));
    memset(&z1, 0, sizeof(z1));
    memset(&wr1, 0, sizeof(wr1));
    memset(&wi1, 0, sizeof(wi1));
    ae_vector_clear(wr);
    ae_vector_clear(wi);
    *info = 0;
    ae_matrix_init(&h1, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&z1, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wr1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wi1, 0, DT_REAL, _state, ae_true);


    /*
     * Allocate space
     */
    ae_vector_set_length(wr, n, _state);
    ae_vector_set_length(wi, n, _state);
    if( zneeded==2 )
    {
        rmatrixsetlengthatleast(z, n, n, _state);
    }

    /*
     * MKL version
     */
    if( rmatrixinternalschurdecompositionmkl(h, n, tneeded, zneeded, wr, wi, z, info, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version
     */
    ae_matrix_set_length(&h1, n+1, n+1, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            h1.ptr.pp_double[1+i][1+j] = h->ptr.pp_double[i][j];
        }
    }
    if( zneeded==1 )
    {
        ae_matrix_set_length(&z1, n+1, n+1, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                z1.ptr.pp_double[1+i][1+j] = z->ptr.pp_double[i][j];
            }
        }
    }
    internalschurdecomposition(&h1, n, tneeded, zneeded, &wr1, &wi1, &z1, info, _state);
    for(i=0; i<=n-1; i++)
    {
        wr->ptr.p_double[i] = wr1.ptr.p_double[i+1];
        wi->ptr.p_double[i] = wi1.ptr.p_double[i+1];
    }
    if( tneeded!=0 )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                h->ptr.pp_double[i][j] = h1.ptr.pp_double[1+i][1+j];
            }
        }
    }
    if( zneeded!=0 )
    {
        rmatrixsetlengthatleast(z, n, n, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                z->ptr.pp_double[i][j] = z1.ptr.pp_double[1+i][1+j];
            }
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Subroutine performing  the  Schur  decomposition  of  a  matrix  in  upper
Hessenberg form using the QR algorithm with multiple shifts.

The  source matrix  H  is  represented as  S'*H*S = T, where H - matrix in
upper Hessenberg form,  S - orthogonal matrix (Schur vectors),   T - upper
quasi-triangular matrix (with blocks of sizes  1x1  and  2x2  on  the main
diagonal).

Input parameters:
    H   -   matrix to be decomposed.
            Array whose indexes range within [1..N, 1..N].
    N   -   size of H, N>=0.


Output parameters:
    H   -   contains the matrix T.
            Array whose indexes range within [1..N, 1..N].
            All elements below the blocks on the main diagonal are equal
            to 0.
    S   -   contains Schur vectors.
            Array whose indexes range within [1..N, 1..N].

Note 1:
    The block structure of matrix T could be easily recognized: since  all
    the elements  below  the blocks are zeros, the elements a[i+1,i] which
    are equal to 0 show the block border.

Note 2:
    the algorithm  performance  depends  on  the  value  of  the  internal
    parameter NS of InternalSchurDecomposition  subroutine  which  defines
    the number of shifts in the QR algorithm (analog of  the  block  width
    in block matrix algorithms in linear algebra). If you require  maximum
    performance  on  your  machine,  it  is  recommended  to  adjust  this
    parameter manually.

Result:
    True, if the algorithm has converged and the parameters H and S contain
        the result.
    False, if the algorithm has not converged.

Algorithm implemented on the basis of subroutine DHSEQR (LAPACK 3.0 library).
*************************************************************************/
ae_bool upperhessenbergschurdecomposition(/* Real    */ ae_matrix* h,
     ae_int_t n,
     /* Real    */ ae_matrix* s,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector wi;
    ae_vector wr;
    ae_int_t info;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&wi, 0, sizeof(wi));
    memset(&wr, 0, sizeof(wr));
    ae_matrix_clear(s);
    ae_vector_init(&wi, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wr, 0, DT_REAL, _state, ae_true);

    internalschurdecomposition(h, n, 1, 2, &wr, &wi, s, &info, _state);
    result = info==0;
    ae_frame_leave(_state);
    return result;
}


void internalschurdecomposition(/* Real    */ ae_matrix* h,
     ae_int_t n,
     ae_int_t tneeded,
     ae_int_t zneeded,
     /* Real    */ ae_vector* wr,
     /* Real    */ ae_vector* wi,
     /* Real    */ ae_matrix* z,
     ae_int_t* info,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_int_t i;
    ae_int_t i1;
    ae_int_t i2;
    ae_int_t ierr;
    ae_int_t ii;
    ae_int_t itemp;
    ae_int_t itn;
    ae_int_t its;
    ae_int_t j;
    ae_int_t k;
    ae_int_t l;
    ae_int_t maxb;
    ae_int_t nr;
    ae_int_t ns;
    ae_int_t nv;
    double absw;
    double smlnum;
    double tau;
    double temp;
    double tst1;
    double ulp;
    double unfl;
    ae_matrix s;
    ae_vector v;
    ae_vector vv;
    ae_vector workc1;
    ae_vector works1;
    ae_vector workv3;
    ae_vector tmpwr;
    ae_vector tmpwi;
    ae_bool initz;
    ae_bool wantt;
    ae_bool wantz;
    double cnst;
    ae_bool failflag;
    ae_int_t p1;
    ae_int_t p2;
    double vt;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&s, 0, sizeof(s));
    memset(&v, 0, sizeof(v));
    memset(&vv, 0, sizeof(vv));
    memset(&workc1, 0, sizeof(workc1));
    memset(&works1, 0, sizeof(works1));
    memset(&workv3, 0, sizeof(workv3));
    memset(&tmpwr, 0, sizeof(tmpwr));
    memset(&tmpwi, 0, sizeof(tmpwi));
    ae_vector_clear(wr);
    ae_vector_clear(wi);
    *info = 0;
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&vv, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&workc1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&works1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&workv3, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tmpwr, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tmpwi, 0, DT_REAL, _state, ae_true);


    /*
     * Set the order of the multi-shift QR algorithm to be used.
     * If you want to tune algorithm, change this values
     */
    ns = 12;
    maxb = 50;

    /*
     * Now 2 < NS <= MAXB < NH.
     */
    maxb = ae_maxint(3, maxb, _state);
    ns = ae_minint(maxb, ns, _state);

    /*
     * Initialize
     */
    cnst = 1.5;
    ae_vector_set_length(&work, ae_maxint(n, 1, _state)+1, _state);
    ae_matrix_set_length(&s, ns+1, ns+1, _state);
    ae_vector_set_length(&v, ns+1+1, _state);
    ae_vector_set_length(&vv, ns+1+1, _state);
    ae_vector_set_length(wr, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(wi, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(&workc1, 1+1, _state);
    ae_vector_set_length(&works1, 1+1, _state);
    ae_vector_set_length(&workv3, 3+1, _state);
    ae_vector_set_length(&tmpwr, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(&tmpwi, ae_maxint(n, 1, _state)+1, _state);
    ae_assert(n>=0, "InternalSchurDecomposition: incorrect N!", _state);
    ae_assert(tneeded==0||tneeded==1, "InternalSchurDecomposition: incorrect TNeeded!", _state);
    ae_assert((zneeded==0||zneeded==1)||zneeded==2, "InternalSchurDecomposition: incorrect ZNeeded!", _state);
    wantt = tneeded==1;
    initz = zneeded==2;
    wantz = zneeded!=0;
    *info = 0;

    /*
     * Initialize Z, if necessary
     */
    if( initz )
    {
        rmatrixsetlengthatleast(z, n+1, n+1, _state);
        for(i=1; i<=n; i++)
        {
            for(j=1; j<=n; j++)
            {
                if( i==j )
                {
                    z->ptr.pp_double[i][j] = (double)(1);
                }
                else
                {
                    z->ptr.pp_double[i][j] = (double)(0);
                }
            }
        }
    }

    /*
     * Quick return if possible
     */
    if( n==0 )
    {
        ae_frame_leave(_state);
        return;
    }
    if( n==1 )
    {
        wr->ptr.p_double[1] = h->ptr.pp_double[1][1];
        wi->ptr.p_double[1] = (double)(0);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Set rows and columns 1 to N to zero below the first
     * subdiagonal.
     */
    for(j=1; j<=n-2; j++)
    {
        for(i=j+2; i<=n; i++)
        {
            h->ptr.pp_double[i][j] = (double)(0);
        }
    }

    /*
     * Test if N is sufficiently small
     */
    if( (ns<=2||ns>n)||maxb>=n )
    {

        /*
         * Use the standard double-shift algorithm
         */
        hsschur_internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);

        /*
         * fill entries under diagonal blocks of T with zeros
         */
        if( wantt )
        {
            j = 1;
            while(j<=n)
            {
                if( ae_fp_eq(wi->ptr.p_double[j],(double)(0)) )
                {
                    for(i=j+1; i<=n; i++)
                    {
                        h->ptr.pp_double[i][j] = (double)(0);
                    }
                    j = j+1;
                }
                else
                {
                    for(i=j+2; i<=n; i++)
                    {
                        h->ptr.pp_double[i][j] = (double)(0);
                        h->ptr.pp_double[i][j+1] = (double)(0);
                    }
                    j = j+2;
                }
            }
        }
        ae_frame_leave(_state);
        return;
    }
    unfl = ae_minrealnumber;
    ulp = 2*ae_machineepsilon;
    smlnum = unfl*(n/ulp);

    /*
     * I1 and I2 are the indices of the first row and last column of H
     * to which transformations must be applied. If eigenvalues only are
     * being computed, I1 and I2 are set inside the main loop.
     */
    i1 = 1;
    i2 = n;

    /*
     * ITN is the total number of multiple-shift QR iterations allowed.
     */
    itn = 30*n;

    /*
     * The main loop begins here. I is the loop index and decreases from
     * IHI to ILO in steps of at most MAXB. Each iteration of the loop
     * works with the active submatrix in rows and columns L to I.
     * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
     * H(L,L-1) is negligible so that the matrix splits.
     */
    i = n;
    for(;;)
    {
        l = 1;
        if( i<1 )
        {

            /*
             * fill entries under diagonal blocks of T with zeros
             */
            if( wantt )
            {
                j = 1;
                while(j<=n)
                {
                    if( ae_fp_eq(wi->ptr.p_double[j],(double)(0)) )
                    {
                        for(i=j+1; i<=n; i++)
                        {
                            h->ptr.pp_double[i][j] = (double)(0);
                        }
                        j = j+1;
                    }
                    else
                    {
                        for(i=j+2; i<=n; i++)
                        {
                            h->ptr.pp_double[i][j] = (double)(0);
                            h->ptr.pp_double[i][j+1] = (double)(0);
                        }
                        j = j+2;
                    }
                }
            }

            /*
             * Exit
             */
            ae_frame_leave(_state);
            return;
        }

        /*
         * Perform multiple-shift QR iterations on rows and columns ILO to I
         * until a submatrix of order at most MAXB splits off at the bottom
         * because a subdiagonal element has become negligible.
         */
        failflag = ae_true;
        for(its=0; its<=itn; its++)
        {

            /*
             * Look for a single small subdiagonal element.
             */
            for(k=i; k>=l+1; k--)
            {
                tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
                if( ae_fp_eq(tst1,(double)(0)) )
                {
                    tst1 = upperhessenberg1norm(h, l, i, l, i, &work, _state);
                }
                if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) )
                {
                    break;
                }
            }
            l = k;
            if( l>1 )
            {

                /*
                 * H(L,L-1) is negligible.
                 */
                h->ptr.pp_double[l][l-1] = (double)(0);
            }

            /*
             * Exit from loop if a submatrix of order <= MAXB has split off.
             */
            if( l>=i-maxb+1 )
            {
                failflag = ae_false;
                break;
            }

            /*
             * Now the active submatrix is in rows and columns L to I. If
             * eigenvalues only are being computed, only the active submatrix
             * need be transformed.
             */
            if( its==20||its==30 )
            {

                /*
                 * Exceptional shifts.
                 */
                for(ii=i-ns+1; ii<=i; ii++)
                {
                    wr->ptr.p_double[ii] = cnst*(ae_fabs(h->ptr.pp_double[ii][ii-1], _state)+ae_fabs(h->ptr.pp_double[ii][ii], _state));
                    wi->ptr.p_double[ii] = (double)(0);
                }
            }
            else
            {

                /*
                 * Use eigenvalues of trailing submatrix of order NS as shifts.
                 */
                copymatrix(h, i-ns+1, i, i-ns+1, i, &s, 1, ns, 1, ns, _state);
                hsschur_internalauxschur(ae_false, ae_false, ns, 1, ns, &s, &tmpwr, &tmpwi, 1, ns, z, &work, &workv3, &workc1, &works1, &ierr, _state);
                for(p1=1; p1<=ns; p1++)
                {
                    wr->ptr.p_double[i-ns+p1] = tmpwr.ptr.p_double[p1];
                    wi->ptr.p_double[i-ns+p1] = tmpwi.ptr.p_double[p1];
                }
                if( ierr>0 )
                {

                    /*
                     * If DLAHQR failed to compute all NS eigenvalues, use the
                     * unconverged diagonal elements as the remaining shifts.
                     */
                    for(ii=1; ii<=ierr; ii++)
                    {
                        wr->ptr.p_double[i-ns+ii] = s.ptr.pp_double[ii][ii];
                        wi->ptr.p_double[i-ns+ii] = (double)(0);
                    }
                }
            }

            /*
             * Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
             * where G is the Hessenberg submatrix H(L:I,L:I) and w is
             * the vector of shifts (stored in WR and WI). The result is
             * stored in the local array V.
             */
            v.ptr.p_double[1] = (double)(1);
            for(ii=2; ii<=ns+1; ii++)
            {
                v.ptr.p_double[ii] = (double)(0);
            }
            nv = 1;
            for(j=i-ns+1; j<=i; j++)
            {
                if( ae_fp_greater_eq(wi->ptr.p_double[j],(double)(0)) )
                {
                    if( ae_fp_eq(wi->ptr.p_double[j],(double)(0)) )
                    {

                        /*
                         * real shift
                         */
                        p1 = nv+1;
                        ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
                        matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &vv, 1, nv, 1.0, &v, 1, nv+1, -wr->ptr.p_double[j], _state);
                        nv = nv+1;
                    }
                    else
                    {
                        if( ae_fp_greater(wi->ptr.p_double[j],(double)(0)) )
                        {

                            /*
                             * complex conjugate pair of shifts
                             */
                            p1 = nv+1;
                            ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
                            matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &v, 1, nv, 1.0, &vv, 1, nv+1, -2*wr->ptr.p_double[j], _state);
                            itemp = vectoridxabsmax(&vv, 1, nv+1, _state);
                            temp = 1/ae_maxreal(ae_fabs(vv.ptr.p_double[itemp], _state), smlnum, _state);
                            p1 = nv+1;
                            ae_v_muld(&vv.ptr.p_double[1], 1, ae_v_len(1,p1), temp);
                            absw = pythag2(wr->ptr.p_double[j], wi->ptr.p_double[j], _state);
                            temp = temp*absw*absw;
                            matrixvectormultiply(h, l, l+nv+1, l, l+nv, ae_false, &vv, 1, nv+1, 1.0, &v, 1, nv+2, temp, _state);
                            nv = nv+2;
                        }
                    }

                    /*
                     * Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
                     * reset it to the unit vector.
                     */
                    itemp = vectoridxabsmax(&v, 1, nv, _state);
                    temp = ae_fabs(v.ptr.p_double[itemp], _state);
                    if( ae_fp_eq(temp,(double)(0)) )
                    {
                        v.ptr.p_double[1] = (double)(1);
                        for(ii=2; ii<=nv; ii++)
                        {
                            v.ptr.p_double[ii] = (double)(0);
                        }
                    }
                    else
                    {
                        temp = ae_maxreal(temp, smlnum, _state);
                        vt = 1/temp;
                        ae_v_muld(&v.ptr.p_double[1], 1, ae_v_len(1,nv), vt);
                    }
                }
            }

            /*
             * Multiple-shift QR step
             */
            for(k=l; k<=i-1; k++)
            {

                /*
                 * The first iteration of this loop determines a reflection G
                 * from the vector V and applies it from left and right to H,
                 * thus creating a nonzero bulge below the subdiagonal.
                 *
                 * Each subsequent iteration determines a reflection G to
                 * restore the Hessenberg form in the (K-1)th column, and thus
                 * chases the bulge one step toward the bottom of the active
                 * submatrix. NR is the order of G.
                 */
                nr = ae_minint(ns+1, i-k+1, _state);
                if( k>l )
                {
                    p1 = k-1;
                    p2 = k+nr-1;
                    ae_v_move(&v.ptr.p_double[1], 1, &h->ptr.pp_double[k][p1], h->stride, ae_v_len(1,nr));
                    touchint(&p2, _state);
                }
                generatereflection(&v, nr, &tau, _state);
                if( k>l )
                {
                    h->ptr.pp_double[k][k-1] = v.ptr.p_double[1];
                    for(ii=k+1; ii<=i; ii++)
                    {
                        h->ptr.pp_double[ii][k-1] = (double)(0);
                    }
                }
                v.ptr.p_double[1] = (double)(1);

                /*
                 * Apply G from the left to transform the rows of the matrix in
                 * columns K to I2.
                 */
                applyreflectionfromtheleft(h, tau, &v, k, k+nr-1, k, i2, &work, _state);

                /*
                 * Apply G from the right to transform the columns of the
                 * matrix in rows I1 to min(K+NR,I).
                 */
                applyreflectionfromtheright(h, tau, &v, i1, ae_minint(k+nr, i, _state), k, k+nr-1, &work, _state);
                if( wantz )
                {

                    /*
                     * Accumulate transformations in the matrix Z
                     */
                    applyreflectionfromtheright(z, tau, &v, 1, n, k, k+nr-1, &work, _state);
                }
            }
        }

        /*
         * Failure to converge in remaining number of iterations
         */
        if( failflag )
        {
            *info = i;
            ae_frame_leave(_state);
            return;
        }

        /*
         * A submatrix of order <= MAXB in rows and columns L to I has split
         * off. Use the double-shift QR algorithm to handle it.
         */
        hsschur_internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
        if( *info>0 )
        {
            ae_frame_leave(_state);
            return;
        }

        /*
         * Decrement number of remaining iterations, and return to start of
         * the main loop with a new value of I.
         */
        itn = itn-its;
        i = l-1;

        /*
         * Block below is never executed; it is necessary just to avoid
         * "unreachable code" warning about automatically generated code.
         *
         * We just need a way to transfer control to the end of the function,
         * even a fake way which is never actually traversed.
         */
        if( alwaysfalse(_state) )
        {
            ae_assert(ae_false, "Assertion failed", _state);
            break;
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Translation of DLAHQR from LAPACK.
*************************************************************************/
static void hsschur_internalauxschur(ae_bool wantt,
     ae_bool wantz,
     ae_int_t n,
     ae_int_t ilo,
     ae_int_t ihi,
     /* Real    */ ae_matrix* h,
     /* Real    */ ae_vector* wr,
     /* Real    */ ae_vector* wi,
     ae_int_t iloz,
     ae_int_t ihiz,
     /* Real    */ ae_matrix* z,
     /* Real    */ ae_vector* work,
     /* Real    */ ae_vector* workv3,
     /* Real    */ ae_vector* workc1,
     /* Real    */ ae_vector* works1,
     ae_int_t* info,
     ae_state *_state)
{
    double safmin;
    double tst;
    double ab;
    double ba;
    double aa;
    double bb;
    double rt1r;
    double rt1i;
    double rt2r;
    double rt2i;
    double tr;
    double det;
    double rtdisc;
    double h21s;
    ae_int_t i;
    ae_int_t i1;
    ae_int_t i2;
    ae_int_t itmax;
    ae_int_t its;
    ae_int_t j;
    ae_int_t k;
    ae_int_t l;
    ae_int_t m;
    ae_int_t nh;
    ae_int_t nr;
    ae_int_t nz;
    double cs;
    double h11;
    double h12;
    double h21;
    double h22;
    double s;
    double smlnum;
    double sn;
    double sum;
    double t1;
    double t2;
    double t3;
    double v2;
    double v3;
    ae_bool failflag;
    double dat1;
    double dat2;
    ae_int_t p1;
    double him1im1;
    double him1i;
    double hiim1;
    double hii;
    double wrim1;
    double wri;
    double wiim1;
    double wii;
    double ulp;

    *info = 0;

    *info = 0;
    dat1 = 0.75;
    dat2 = -0.4375;

    /*
     * Quick return if possible
     */
    if( n==0 )
    {
        return;
    }
    if( ilo==ihi )
    {
        wr->ptr.p_double[ilo] = h->ptr.pp_double[ilo][ilo];
        wi->ptr.p_double[ilo] = (double)(0);
        return;
    }

    /*
     * ==== clear out the trash ====
     */
    for(j=ilo; j<=ihi-3; j++)
    {
        h->ptr.pp_double[j+2][j] = (double)(0);
        h->ptr.pp_double[j+3][j] = (double)(0);
    }
    if( ilo<=ihi-2 )
    {
        h->ptr.pp_double[ihi][ihi-2] = (double)(0);
    }
    nh = ihi-ilo+1;
    nz = ihiz-iloz+1;

    /*
     * Set machine-dependent constants for the stopping criterion.
     */
    safmin = ae_minrealnumber;
    ulp = ae_machineepsilon;
    smlnum = safmin*(nh/ulp);

    /*
     * I1 and I2 are the indices of the first row and last column of H
     * to which transformations must be applied. If eigenvalues only are
     * being computed, I1 and I2 are set inside the main loop.
     *
     * Setting them to large negative value helps to debug possible errors
     * due to uninitialized variables; also it helps to avoid compiler
     * warnings.
     */
    i1 = -99999;
    i2 = -99999;
    if( wantt )
    {
        i1 = 1;
        i2 = n;
    }

    /*
     * ITMAX is the total number of QR iterations allowed.
     */
    itmax = 30*ae_maxint(10, nh, _state);

    /*
     * The main loop begins here. I is the loop index and decreases from
     * IHI to ILO in steps of 1 or 2. Each iteration of the loop works
     * with the active submatrix in rows and columns L to I.
     * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
     * H(L,L-1) is negligible so that the matrix splits.
     */
    i = ihi;
    for(;;)
    {
        l = ilo;
        if( i<ilo )
        {
            return;
        }

        /*
         * Perform QR iterations on rows and columns ILO to I until a
         * submatrix of order 1 or 2 splits off at the bottom because a
         * subdiagonal element has become negligible.
         */
        failflag = ae_true;
        for(its=0; its<=itmax; its++)
        {

            /*
             * Look for a single small subdiagonal element.
             */
            for(k=i; k>=l+1; k--)
            {
                if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),smlnum) )
                {
                    break;
                }
                tst = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
                if( ae_fp_eq(tst,(double)(0)) )
                {
                    if( k-2>=ilo )
                    {
                        tst = tst+ae_fabs(h->ptr.pp_double[k-1][k-2], _state);
                    }
                    if( k+1<=ihi )
                    {
                        tst = tst+ae_fabs(h->ptr.pp_double[k+1][k], _state);
                    }
                }

                /*
                 * ==== The following is a conservative small subdiagonal
                 * .    deflation  criterion due to Ahues & Tisseur (LAWN 122,
                 * .    1997). It has better mathematical foundation and
                 * .    improves accuracy in some cases.  ====
                 */
                if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ulp*tst) )
                {
                    ab = ae_maxreal(ae_fabs(h->ptr.pp_double[k][k-1], _state), ae_fabs(h->ptr.pp_double[k-1][k], _state), _state);
                    ba = ae_minreal(ae_fabs(h->ptr.pp_double[k][k-1], _state), ae_fabs(h->ptr.pp_double[k-1][k], _state), _state);
                    aa = ae_maxreal(ae_fabs(h->ptr.pp_double[k][k], _state), ae_fabs(h->ptr.pp_double[k-1][k-1]-h->ptr.pp_double[k][k], _state), _state);
                    bb = ae_minreal(ae_fabs(h->ptr.pp_double[k][k], _state), ae_fabs(h->ptr.pp_double[k-1][k-1]-h->ptr.pp_double[k][k], _state), _state);
                    s = aa+ab;
                    if( ae_fp_less_eq(ba*(ab/s),ae_maxreal(smlnum, ulp*(bb*(aa/s)), _state)) )
                    {
                        break;
                    }
                }
            }
            l = k;
            if( l>ilo )
            {

                /*
                 * H(L,L-1) is negligible
                 */
                h->ptr.pp_double[l][l-1] = (double)(0);
            }

            /*
             * Exit from loop if a submatrix of order 1 or 2 has split off.
             */
            if( l>=i-1 )
            {
                failflag = ae_false;
                break;
            }

            /*
             * Now the active submatrix is in rows and columns L to I. If
             * eigenvalues only are being computed, only the active submatrix
             * need be transformed.
             */
            if( !wantt )
            {
                i1 = l;
                i2 = i;
            }

            /*
             * Shifts
             */
            if( its==10 )
            {

                /*
                 * Exceptional shift.
                 */
                s = ae_fabs(h->ptr.pp_double[l+1][l], _state)+ae_fabs(h->ptr.pp_double[l+2][l+1], _state);
                h11 = dat1*s+h->ptr.pp_double[l][l];
                h12 = dat2*s;
                h21 = s;
                h22 = h11;
            }
            else
            {
                if( its==20 )
                {

                    /*
                     * Exceptional shift.
                     */
                    s = ae_fabs(h->ptr.pp_double[i][i-1], _state)+ae_fabs(h->ptr.pp_double[i-1][i-2], _state);
                    h11 = dat1*s+h->ptr.pp_double[i][i];
                    h12 = dat2*s;
                    h21 = s;
                    h22 = h11;
                }
                else
                {

                    /*
                     * Prepare to use Francis' double shift
                     * (i.e. 2nd degree generalized Rayleigh quotient)
                     */
                    h11 = h->ptr.pp_double[i-1][i-1];
                    h21 = h->ptr.pp_double[i][i-1];
                    h12 = h->ptr.pp_double[i-1][i];
                    h22 = h->ptr.pp_double[i][i];
                }
            }
            s = ae_fabs(h11, _state)+ae_fabs(h12, _state)+ae_fabs(h21, _state)+ae_fabs(h22, _state);
            if( ae_fp_eq(s,(double)(0)) )
            {
                rt1r = (double)(0);
                rt1i = (double)(0);
                rt2r = (double)(0);
                rt2i = (double)(0);
            }
            else
            {
                h11 = h11/s;
                h21 = h21/s;
                h12 = h12/s;
                h22 = h22/s;
                tr = (h11+h22)/2;
                det = (h11-tr)*(h22-tr)-h12*h21;
                rtdisc = ae_sqrt(ae_fabs(det, _state), _state);
                if( ae_fp_greater_eq(det,(double)(0)) )
                {

                    /*
                     * ==== complex conjugate shifts ====
                     */
                    rt1r = tr*s;
                    rt2r = rt1r;
                    rt1i = rtdisc*s;
                    rt2i = -rt1i;
                }
                else
                {

                    /*
                     * ==== real shifts (use only one of them)  ====
                     */
                    rt1r = tr+rtdisc;
                    rt2r = tr-rtdisc;
                    if( ae_fp_less_eq(ae_fabs(rt1r-h22, _state),ae_fabs(rt2r-h22, _state)) )
                    {
                        rt1r = rt1r*s;
                        rt2r = rt1r;
                    }
                    else
                    {
                        rt2r = rt2r*s;
                        rt1r = rt2r;
                    }
                    rt1i = (double)(0);
                    rt2i = (double)(0);
                }
            }

            /*
             * Look for two consecutive small subdiagonal elements.
             */
            for(m=i-2; m>=l; m--)
            {

                /*
                 * Determine the effect of starting the double-shift QR
                 * iteration at row M, and see if this would make H(M,M-1)
                 * negligible.  (The following uses scaling to avoid
                 * overflows and most underflows.)
                 */
                h21s = h->ptr.pp_double[m+1][m];
                s = ae_fabs(h->ptr.pp_double[m][m]-rt2r, _state)+ae_fabs(rt2i, _state)+ae_fabs(h21s, _state);
                h21s = h->ptr.pp_double[m+1][m]/s;
                workv3->ptr.p_double[1] = h21s*h->ptr.pp_double[m][m+1]+(h->ptr.pp_double[m][m]-rt1r)*((h->ptr.pp_double[m][m]-rt2r)/s)-rt1i*(rt2i/s);
                workv3->ptr.p_double[2] = h21s*(h->ptr.pp_double[m][m]+h->ptr.pp_double[m+1][m+1]-rt1r-rt2r);
                workv3->ptr.p_double[3] = h21s*h->ptr.pp_double[m+2][m+1];
                s = ae_fabs(workv3->ptr.p_double[1], _state)+ae_fabs(workv3->ptr.p_double[2], _state)+ae_fabs(workv3->ptr.p_double[3], _state);
                workv3->ptr.p_double[1] = workv3->ptr.p_double[1]/s;
                workv3->ptr.p_double[2] = workv3->ptr.p_double[2]/s;
                workv3->ptr.p_double[3] = workv3->ptr.p_double[3]/s;
                if( m==l )
                {
                    break;
                }
                if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[m][m-1], _state)*(ae_fabs(workv3->ptr.p_double[2], _state)+ae_fabs(workv3->ptr.p_double[3], _state)),ulp*ae_fabs(workv3->ptr.p_double[1], _state)*(ae_fabs(h->ptr.pp_double[m-1][m-1], _state)+ae_fabs(h->ptr.pp_double[m][m], _state)+ae_fabs(h->ptr.pp_double[m+1][m+1], _state))) )
                {
                    break;
                }
            }

            /*
             * Double-shift QR step
             */
            for(k=m; k<=i-1; k++)
            {

                /*
                 * The first iteration of this loop determines a reflection G
                 * from the vector V and applies it from left and right to H,
                 * thus creating a nonzero bulge below the subdiagonal.
                 *
                 * Each subsequent iteration determines a reflection G to
                 * restore the Hessenberg form in the (K-1)th column, and thus
                 * chases the bulge one step toward the bottom of the active
                 * submatrix. NR is the order of G.
                 */
                nr = ae_minint(3, i-k+1, _state);
                if( k>m )
                {
                    for(p1=1; p1<=nr; p1++)
                    {
                        workv3->ptr.p_double[p1] = h->ptr.pp_double[k+p1-1][k-1];
                    }
                }
                generatereflection(workv3, nr, &t1, _state);
                if( k>m )
                {
                    h->ptr.pp_double[k][k-1] = workv3->ptr.p_double[1];
                    h->ptr.pp_double[k+1][k-1] = (double)(0);
                    if( k<i-1 )
                    {
                        h->ptr.pp_double[k+2][k-1] = (double)(0);
                    }
                }
                else
                {
                    if( m>l )
                    {

                        /*
                         * ==== Use the following instead of
                         * H( K, K-1 ) = -H( K, K-1 ) to
                         * avoid a bug when v(2) and v(3)
                         * underflow. ====
                         */
                        h->ptr.pp_double[k][k-1] = h->ptr.pp_double[k][k-1]*(1-t1);
                    }
                }
                v2 = workv3->ptr.p_double[2];
                t2 = t1*v2;
                if( nr==3 )
                {
                    v3 = workv3->ptr.p_double[3];
                    t3 = t1*v3;

                    /*
                     * Apply G from the left to transform the rows of the matrix
                     * in columns K to I2.
                     */
                    for(j=k; j<=i2; j++)
                    {
                        sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j]+v3*h->ptr.pp_double[k+2][j];
                        h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
                        h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
                        h->ptr.pp_double[k+2][j] = h->ptr.pp_double[k+2][j]-sum*t3;
                    }

                    /*
                     * Apply G from the right to transform the columns of the
                     * matrix in rows I1 to min(K+3,I).
                     */
                    for(j=i1; j<=ae_minint(k+3, i, _state); j++)
                    {
                        sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1]+v3*h->ptr.pp_double[j][k+2];
                        h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
                        h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
                        h->ptr.pp_double[j][k+2] = h->ptr.pp_double[j][k+2]-sum*t3;
                    }
                    if( wantz )
                    {

                        /*
                         * Accumulate transformations in the matrix Z
                         */
                        for(j=iloz; j<=ihiz; j++)
                        {
                            sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1]+v3*z->ptr.pp_double[j][k+2];
                            z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
                            z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
                            z->ptr.pp_double[j][k+2] = z->ptr.pp_double[j][k+2]-sum*t3;
                        }
                    }
                }
                else
                {
                    if( nr==2 )
                    {

                        /*
                         * Apply G from the left to transform the rows of the matrix
                         * in columns K to I2.
                         */
                        for(j=k; j<=i2; j++)
                        {
                            sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j];
                            h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
                            h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
                        }

                        /*
                         * Apply G from the right to transform the columns of the
                         * matrix in rows I1 to min(K+3,I).
                         */
                        for(j=i1; j<=i; j++)
                        {
                            sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1];
                            h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
                            h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
                        }
                        if( wantz )
                        {

                            /*
                             * Accumulate transformations in the matrix Z
                             */
                            for(j=iloz; j<=ihiz; j++)
                            {
                                sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1];
                                z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
                                z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
                            }
                        }
                    }
                }
            }
        }

        /*
         * Failure to converge in remaining number of iterations
         */
        if( failflag )
        {
            *info = i;
            return;
        }

        /*
         * Convergence
         */
        if( l==i )
        {

            /*
             * H(I,I-1) is negligible: one eigenvalue has converged.
             */
            wr->ptr.p_double[i] = h->ptr.pp_double[i][i];
            wi->ptr.p_double[i] = (double)(0);
        }
        else
        {
            if( l==i-1 )
            {

                /*
                 * H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
                 *
                 * Transform the 2-by-2 submatrix to standard Schur form,
                 * and compute and store the eigenvalues.
                 */
                him1im1 = h->ptr.pp_double[i-1][i-1];
                him1i = h->ptr.pp_double[i-1][i];
                hiim1 = h->ptr.pp_double[i][i-1];
                hii = h->ptr.pp_double[i][i];
                hsschur_aux2x2schur(&him1im1, &him1i, &hiim1, &hii, &wrim1, &wiim1, &wri, &wii, &cs, &sn, _state);
                wr->ptr.p_double[i-1] = wrim1;
                wi->ptr.p_double[i-1] = wiim1;
                wr->ptr.p_double[i] = wri;
                wi->ptr.p_double[i] = wii;
                h->ptr.pp_double[i-1][i-1] = him1im1;
                h->ptr.pp_double[i-1][i] = him1i;
                h->ptr.pp_double[i][i-1] = hiim1;
                h->ptr.pp_double[i][i] = hii;
                if( wantt )
                {

                    /*
                     * Apply the transformation to the rest of H.
                     */
                    if( i2>i )
                    {
                        workc1->ptr.p_double[1] = cs;
                        works1->ptr.p_double[1] = sn;
                        applyrotationsfromtheleft(ae_true, i-1, i, i+1, i2, workc1, works1, h, work, _state);
                    }
                    workc1->ptr.p_double[1] = cs;
                    works1->ptr.p_double[1] = sn;
                    applyrotationsfromtheright(ae_true, i1, i-2, i-1, i, workc1, works1, h, work, _state);
                }
                if( wantz )
                {

                    /*
                     * Apply the transformation to Z.
                     */
                    workc1->ptr.p_double[1] = cs;
                    works1->ptr.p_double[1] = sn;
                    applyrotationsfromtheright(ae_true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work, _state);
                }
            }
        }

        /*
         * return to start of the main loop with new value of I.
         */
        i = l-1;
    }
}


static void hsschur_aux2x2schur(double* a,
     double* b,
     double* c,
     double* d,
     double* rt1r,
     double* rt1i,
     double* rt2r,
     double* rt2i,
     double* cs,
     double* sn,
     ae_state *_state)
{
    double multpl;
    double aa;
    double bb;
    double bcmax;
    double bcmis;
    double cc;
    double cs1;
    double dd;
    double eps;
    double p;
    double sab;
    double sac;
    double scl;
    double sigma;
    double sn1;
    double tau;
    double temp;
    double z;

    *rt1r = 0;
    *rt1i = 0;
    *rt2r = 0;
    *rt2i = 0;
    *cs = 0;
    *sn = 0;

    multpl = 4.0;
    eps = ae_machineepsilon;
    if( ae_fp_eq(*c,(double)(0)) )
    {
        *cs = (double)(1);
        *sn = (double)(0);
    }
    else
    {
        if( ae_fp_eq(*b,(double)(0)) )
        {

            /*
             * Swap rows and columns
             */
            *cs = (double)(0);
            *sn = (double)(1);
            temp = *d;
            *d = *a;
            *a = temp;
            *b = -*c;
            *c = (double)(0);
        }
        else
        {
            if( ae_fp_eq(*a-(*d),(double)(0))&&hsschur_extschursigntoone(*b, _state)!=hsschur_extschursigntoone(*c, _state) )
            {
                *cs = (double)(1);
                *sn = (double)(0);
            }
            else
            {
                temp = *a-(*d);
                p = 0.5*temp;
                bcmax = ae_maxreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state);
                bcmis = ae_minreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state)*hsschur_extschursigntoone(*b, _state)*hsschur_extschursigntoone(*c, _state);
                scl = ae_maxreal(ae_fabs(p, _state), bcmax, _state);
                z = p/scl*p+bcmax/scl*bcmis;

                /*
                 * If Z is of the order of the machine accuracy, postpone the
                 * decision on the nature of eigenvalues
                 */
                if( ae_fp_greater_eq(z,multpl*eps) )
                {

                    /*
                     * Real eigenvalues. Compute A and D.
                     */
                    z = p+hsschur_extschursign(ae_sqrt(scl, _state)*ae_sqrt(z, _state), p, _state);
                    *a = *d+z;
                    *d = *d-bcmax/z*bcmis;

                    /*
                     * Compute B and the rotation matrix
                     */
                    tau = pythag2(*c, z, _state);
                    *cs = z/tau;
                    *sn = *c/tau;
                    *b = *b-(*c);
                    *c = (double)(0);
                }
                else
                {

                    /*
                     * Complex eigenvalues, or real (almost) equal eigenvalues.
                     * Make diagonal elements equal.
                     */
                    sigma = *b+(*c);
                    tau = pythag2(sigma, temp, _state);
                    *cs = ae_sqrt(0.5*(1+ae_fabs(sigma, _state)/tau), _state);
                    *sn = -p/(tau*(*cs))*hsschur_extschursign((double)(1), sigma, _state);

                    /*
                     * Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
                     *         [ CC  DD ]   [ C  D ] [ SN  CS ]
                     */
                    aa = *a*(*cs)+*b*(*sn);
                    bb = -*a*(*sn)+*b*(*cs);
                    cc = *c*(*cs)+*d*(*sn);
                    dd = -*c*(*sn)+*d*(*cs);

                    /*
                     * Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
                     *         [ C  D ]   [-SN  CS ] [ CC  DD ]
                     */
                    *a = aa*(*cs)+cc*(*sn);
                    *b = bb*(*cs)+dd*(*sn);
                    *c = -aa*(*sn)+cc*(*cs);
                    *d = -bb*(*sn)+dd*(*cs);
                    temp = 0.5*(*a+(*d));
                    *a = temp;
                    *d = temp;
                    if( ae_fp_neq(*c,(double)(0)) )
                    {
                        if( ae_fp_neq(*b,(double)(0)) )
                        {
                            if( hsschur_extschursigntoone(*b, _state)==hsschur_extschursigntoone(*c, _state) )
                            {

                                /*
                                 * Real eigenvalues: reduce to upper triangular form
                                 */
                                sab = ae_sqrt(ae_fabs(*b, _state), _state);
                                sac = ae_sqrt(ae_fabs(*c, _state), _state);
                                p = hsschur_extschursign(sab*sac, *c, _state);
                                tau = 1/ae_sqrt(ae_fabs(*b+(*c), _state), _state);
                                *a = temp+p;
                                *d = temp-p;
                                *b = *b-(*c);
                                *c = (double)(0);
                                cs1 = sab*tau;
                                sn1 = sac*tau;
                                temp = *cs*cs1-*sn*sn1;
                                *sn = *cs*sn1+*sn*cs1;
                                *cs = temp;
                            }
                        }
                        else
                        {
                            *b = -*c;
                            *c = (double)(0);
                            temp = *cs;
                            *cs = -*sn;
                            *sn = temp;
                        }
                    }
                }
            }
        }
    }

    /*
     * Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
     */
    *rt1r = *a;
    *rt2r = *d;
    if( ae_fp_eq(*c,(double)(0)) )
    {
        *rt1i = (double)(0);
        *rt2i = (double)(0);
    }
    else
    {
        *rt1i = ae_sqrt(ae_fabs(*b, _state), _state)*ae_sqrt(ae_fabs(*c, _state), _state);
        *rt2i = -*rt1i;
    }
}


static double hsschur_extschursign(double a, double b, ae_state *_state)
{
    double result;


    if( ae_fp_greater_eq(b,(double)(0)) )
    {
        result = ae_fabs(a, _state);
    }
    else
    {
        result = -ae_fabs(a, _state);
    }
    return result;
}


static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state)
{
    ae_int_t result;


    if( ae_fp_greater_eq(b,(double)(0)) )
    {
        result = 1;
    }
    else
    {
        result = -1;
    }
    return result;
}


#endif
#if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
This function initializes subspace iteration solver. This solver  is  used
to solve symmetric real eigenproblems where just a few (top K) eigenvalues
and corresponding eigenvectors is required.

This solver can be significantly faster than  complete  EVD  decomposition
in the following case:
* when only just a small fraction  of  top  eigenpairs  of dense matrix is
  required. When K approaches N, this solver is slower than complete dense
  EVD
* when problem matrix is sparse (and/or is not known explicitly, i.e. only
  matrix-matrix product can be performed)

USAGE (explicit dense/sparse matrix):
1. User initializes algorithm state with eigsubspacecreate() call
2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
   or other functions
3. User  calls  eigsubspacesolvedense() or eigsubspacesolvesparse() methods,
   which take algorithm state and 2D array or alglib.sparsematrix object.

USAGE (out-of-core mode):
1. User initializes algorithm state with eigsubspacecreate() call
2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
   or other functions
3. User activates out-of-core mode of  the  solver  and  repeatedly  calls
   communication functions in a loop like below:
   > alglib.eigsubspaceoocstart(state)
   > while alglib.eigsubspaceooccontinue(state) do
   >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
   >     alglib.eigsubspaceoocgetrequestdata(state, out X)
   >     [calculate  Y=A*X, with X=R^NxM]
   >     alglib.eigsubspaceoocsendresult(state, in Y)
   > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    N       -   problem dimensionality, N>0
    K       -   number of top eigenvector to calculate, 0<K<=N.

OUTPUT PARAMETERS:
    State   -   structure which stores algorithm state

NOTE: if you solve many similar EVD problems you may  find  it  useful  to
      reuse previous subspace as warm-start point for new EVD problem.  It
      can be done with eigsubspacesetwarmstart() function.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacecreate(ae_int_t n,
     ae_int_t k,
     eigsubspacestate* state,
     ae_state *_state)
{

    _eigsubspacestate_clear(state);

    ae_assert(n>0, "EigSubspaceCreate: N<=0", _state);
    ae_assert(k>0, "EigSubspaceCreate: K<=0", _state);
    ae_assert(k<=n, "EigSubspaceCreate: K>N", _state);
    eigsubspacecreatebuf(n, k, state, _state);
}


/*************************************************************************
Buffered version of constructor which aims to reuse  previously  allocated
memory as much as possible.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacecreatebuf(ae_int_t n,
     ae_int_t k,
     eigsubspacestate* state,
     ae_state *_state)
{


    ae_assert(n>0, "EigSubspaceCreate: N<=0", _state);
    ae_assert(k>0, "EigSubspaceCreate: K<=0", _state);
    ae_assert(k<=n, "EigSubspaceCreate: K>N", _state);

    /*
     * Initialize algorithm parameters
     */
    state->running = ae_false;
    state->n = n;
    state->k = k;
    state->nwork = ae_minint(ae_maxint(2*k, 8, _state), n, _state);
    state->eigenvectorsneeded = 1;
    state->usewarmstart = ae_false;
    state->firstcall = ae_true;
    eigsubspacesetcond(state, 0.0, 0, _state);

    /*
     * Allocate temporaries
     */
    rmatrixsetlengthatleast(&state->x, state->n, state->nwork, _state);
    rmatrixsetlengthatleast(&state->ax, state->n, state->nwork, _state);
}


/*************************************************************************
This function sets stopping critera for the solver:
* error in eigenvector/value allowed by solver
* maximum number of iterations to perform

INPUT PARAMETERS:
    State       -   solver structure
    Eps         -   eps>=0,  with non-zero value used to tell solver  that
                    it can  stop  after  all  eigenvalues  converged  with
                    error  roughly  proportional  to  eps*MAX(LAMBDA_MAX),
                    where LAMBDA_MAX is a maximum eigenvalue.
                    Zero  value  means  that  no  check  for  precision is
                    performed.
    MaxIts      -   maxits>=0,  with non-zero value used  to  tell  solver
                    that it can stop after maxits  steps  (no  matter  how
                    precise current estimate is)

NOTE: passing  eps=0  and  maxits=0  results  in  automatic  selection  of
      moderate eps as stopping criteria (1.0E-6 in current implementation,
      but it may change without notice).

NOTE: very small values of eps are possible (say, 1.0E-12),  although  the
      larger problem you solve (N and/or K), the  harder  it  is  to  find
      precise eigenvectors because rounding errors tend to accumulate.

NOTE: passing non-zero eps results in  some performance  penalty,  roughly
      equal to 2N*(2K)^2 FLOPs per iteration. These additional computations
      are required in order to estimate current error in  eigenvalues  via
      Rayleigh-Ritz process.
      Most of this additional time is  spent  in  construction  of  ~2Kx2K
      symmetric  subproblem  whose  eigenvalues  are  checked  with  exact
      eigensolver.
      This additional time is negligible if you search for eigenvalues  of
      the large dense matrix, but may become noticeable on  highly  sparse
      EVD problems, where cost of matrix-matrix product is low.
      If you set eps to exactly zero,  Rayleigh-Ritz  phase  is completely
      turned off.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesetcond(eigsubspacestate* state,
     double eps,
     ae_int_t maxits,
     ae_state *_state)
{


    ae_assert(!state->running, "EigSubspaceSetCond: solver is already running", _state);
    ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "EigSubspaceSetCond: Eps<0 or NAN/INF", _state);
    ae_assert(maxits>=0, "EigSubspaceSetCond: MaxIts<0", _state);
    if( ae_fp_eq(eps,(double)(0))&&maxits==0 )
    {
        eps = 1.0E-6;
    }
    state->eps = eps;
    state->maxits = maxits;
}


/*************************************************************************
This function sets warm-start mode of the solver: next call to the  solver
will reuse previous subspace as warm-start  point.  It  can  significantly
speed-up convergence when you solve many similar eigenproblems.

INPUT PARAMETERS:
    State       -   solver structure
    UseWarmStart-   either True or False

  -- ALGLIB --
     Copyright 12.11.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesetwarmstart(eigsubspacestate* state,
     ae_bool usewarmstart,
     ae_state *_state)
{


    ae_assert(!state->running, "EigSubspaceSetWarmStart: solver is already running", _state);
    state->usewarmstart = usewarmstart;
}


/*************************************************************************
This  function  initiates  out-of-core  mode  of  subspace eigensolver. It
should be used in conjunction with other out-of-core-related functions  of
this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State       -   solver object
    MType       -   matrix type:
                    * 0 for real  symmetric  matrix  (solver  assumes that
                      matrix  being   processed  is  symmetric;  symmetric
                      direct eigensolver is used for  smaller  subproblems
                      arising during solution of larger "full" task)
                    Future versions of ALGLIB may  introduce  support  for
                    other  matrix   types;   for   now,   only   symmetric
                    eigenproblems are supported.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocstart(eigsubspacestate* state,
     ae_int_t mtype,
     ae_state *_state)
{


    ae_assert(!state->running, "EigSubspaceStart: solver is already running", _state);
    ae_assert(mtype==0, "EigSubspaceStart: incorrect mtype parameter", _state);
    ae_vector_set_length(&state->rstate.ia, 7+1, _state);
    ae_vector_set_length(&state->rstate.ra, 1+1, _state);
    state->rstate.stage = -1;
    evd_clearrfields(state, _state);
    state->running = ae_true;
    state->matrixtype = mtype;
}


/*************************************************************************
This function performs subspace iteration  in  the  out-of-core  mode.  It
should be used in conjunction with other out-of-core-related functions  of
this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
ae_bool eigsubspaceooccontinue(eigsubspacestate* state, ae_state *_state)
{
    ae_bool result;


    ae_assert(state->running, "EigSubspaceContinue: solver is not running", _state);
    result = eigsubspaceiteration(state, _state);
    state->running = result;
    return result;
}


/*************************************************************************
This function is used to retrieve information  about  out-of-core  request
sent by solver to user code: request type (current version  of  the solver
sends only requests for matrix-matrix products) and request size (size  of
the matrices being multiplied).

This function returns just request metrics; in order  to  get contents  of
the matrices being multiplied, use eigsubspaceoocgetrequestdata().

It should be used in conjunction with other out-of-core-related  functions
of this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State           -   solver running in out-of-core mode

OUTPUT PARAMETERS:
    RequestType     -   type of the request to process:
                        * 0 - for matrix-matrix product A*X, with A  being
                          NxN matrix whose eigenvalues/vectors are needed,
                          and X being NxREQUESTSIZE one which is  returned
                          by the eigsubspaceoocgetrequestdata().
    RequestSize     -   size of the X matrix (number of columns),  usually
                        it is several times larger than number of  vectors
                        K requested by user.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocgetrequestinfo(eigsubspacestate* state,
     ae_int_t* requesttype,
     ae_int_t* requestsize,
     ae_state *_state)
{

    *requesttype = 0;
    *requestsize = 0;

    ae_assert(state->running, "EigSubspaceOOCGetRequestInfo: solver is not running", _state);
    *requesttype = state->requesttype;
    *requestsize = state->requestsize;
}


/*************************************************************************
This function is used to retrieve information  about  out-of-core  request
sent by solver to user code: matrix X (array[N,RequestSize) which have  to
be multiplied by out-of-core matrix A in a product A*X.

This function returns just request data; in order to get size of  the data
prior to processing requestm, use eigsubspaceoocgetrequestinfo().

It should be used in conjunction with other out-of-core-related  functions
of this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State           -   solver running in out-of-core mode
    X               -   possibly  preallocated   storage;  reallocated  if
                        needed, left unchanged, if large enough  to  store
                        request data.

OUTPUT PARAMETERS:
    X               -   array[N,RequestSize] or larger, leading  rectangle
                        is filled with dense matrix X.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocgetrequestdata(eigsubspacestate* state,
     /* Real    */ ae_matrix* x,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;


    ae_assert(state->running, "EigSubspaceOOCGetRequestInfo: solver is not running", _state);
    rmatrixsetlengthatleast(x, state->n, state->requestsize, _state);
    for(i=0; i<=state->n-1; i++)
    {
        for(j=0; j<=state->requestsize-1; j++)
        {
            x->ptr.pp_double[i][j] = state->x.ptr.pp_double[i][j];
        }
    }
}


/*************************************************************************
This function is used to send user reply to out-of-core  request  sent  by
solver. Usually it is product A*X for returned by solver matrix X.

It should be used in conjunction with other out-of-core-related  functions
of this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State           -   solver running in out-of-core mode
    AX              -   array[N,RequestSize] or larger, leading  rectangle
                        is filled with product A*X.


  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocsendresult(eigsubspacestate* state,
     /* Real    */ ae_matrix* ax,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;


    ae_assert(state->running, "EigSubspaceOOCGetRequestInfo: solver is not running", _state);
    for(i=0; i<=state->n-1; i++)
    {
        for(j=0; j<=state->requestsize-1; j++)
        {
            state->ax.ptr.pp_double[i][j] = ax->ptr.pp_double[i][j];
        }
    }
}


/*************************************************************************
This  function  finalizes out-of-core  mode  of  subspace eigensolver.  It
should be used in conjunction with other out-of-core-related functions  of
this subspackage in a loop like below:

> alglib.eigsubspaceoocstart(state)
> while alglib.eigsubspaceooccontinue(state) do
>     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
>     alglib.eigsubspaceoocgetrequestdata(state, out X)
>     [calculate  Y=A*X, with X=R^NxM]
>     alglib.eigsubspaceoocsendresult(state, in Y)
> alglib.eigsubspaceoocstop(state, out W, out Z, out Report)

INPUT PARAMETERS:
    State       -   solver state

OUTPUT PARAMETERS:
    W           -   array[K], depending on solver settings:
                    * top  K  eigenvalues ordered  by  descending   -   if
                      eigenvectors are returned in Z
                    * zeros - if invariant subspace is returned in Z
    Z           -   array[N,K], depending on solver settings either:
                    * matrix of eigenvectors found
                    * orthogonal basis of K-dimensional invariant subspace
    Rep         -   report with additional parameters

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspaceoocstop(eigsubspacestate* state,
     /* Real    */ ae_vector* w,
     /* Real    */ ae_matrix* z,
     eigsubspacereport* rep,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    ae_int_t i;
    ae_int_t j;

    ae_vector_clear(w);
    ae_matrix_clear(z);
    _eigsubspacereport_clear(rep);

    ae_assert(!state->running, "EigSubspaceStop: solver is still running", _state);
    n = state->n;
    k = state->k;
    ae_vector_set_length(w, k, _state);
    ae_matrix_set_length(z, n, k, _state);
    for(i=0; i<=k-1; i++)
    {
        w->ptr.p_double[i] = state->rw.ptr.p_double[i];
    }
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=k-1; j++)
        {
            z->ptr.pp_double[i][j] = state->rq.ptr.pp_double[i][j];
        }
    }
    rep->iterationscount = state->repiterationscount;
}


/*************************************************************************
This  function runs subspace eigensolver for dense NxN symmetric matrix A,
given by its upper or lower triangle.

This function can not process nonsymmetric matrices.

INPUT PARAMETERS:
    State       -   solver state
    A           -   array[N,N], symmetric NxN matrix given by one  of  its
                    triangles
    IsUpper     -   whether upper or lower triangle of  A  is  given  (the
                    other one is not referenced at all).

OUTPUT PARAMETERS:
    W           -   array[K], top  K  eigenvalues ordered  by   descending
                    of their absolute values
    Z           -   array[N,K], matrix of eigenvectors found
    Rep         -   report with additional parameters

NOTE: internally this function allocates a copy of NxN dense A. You should
      take it into account when working with very large matrices occupying
      almost all RAM.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesolvedenses(eigsubspacestate* state,
     /* Real    */ ae_matrix* a,
     ae_bool isupper,
     /* Real    */ ae_vector* w,
     /* Real    */ ae_matrix* z,
     eigsubspacereport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t n;
    ae_int_t m;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    double v;
    ae_matrix acopy;

    ae_frame_make(_state, &_frame_block);
    memset(&acopy, 0, sizeof(acopy));
    ae_vector_clear(w);
    ae_matrix_clear(z);
    _eigsubspacereport_clear(rep);
    ae_matrix_init(&acopy, 0, 0, DT_REAL, _state, ae_true);

    ae_assert(!state->running, "EigSubspaceSolveDenseS: solver is still running", _state);
    n = state->n;

    /*
     * Allocate copy of A, copy one triangle to another
     */
    ae_matrix_set_length(&acopy, n, n, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=i; j<=n-1; j++)
        {
            if( isupper )
            {
                v = a->ptr.pp_double[i][j];
            }
            else
            {
                v = a->ptr.pp_double[j][i];
            }
            acopy.ptr.pp_double[i][j] = v;
            acopy.ptr.pp_double[j][i] = v;
        }
    }

    /*
     * Start iterations
     */
    state->matrixtype = 0;
    ae_vector_set_length(&state->rstate.ia, 7+1, _state);
    ae_vector_set_length(&state->rstate.ra, 1+1, _state);
    state->rstate.stage = -1;
    evd_clearrfields(state, _state);
    while(eigsubspaceiteration(state, _state))
    {

        /*
         * Calculate A*X with RMatrixGEMM
         */
        ae_assert(state->requesttype==0, "EigSubspaceSolveDense: integrity check failed", _state);
        ae_assert(state->requestsize>0, "EigSubspaceSolveDense: integrity check failed", _state);
        m = state->requestsize;
        rmatrixgemm(n, m, n, 1.0, &acopy, 0, 0, 0, &state->x, 0, 0, 0, 0.0, &state->ax, 0, 0, _state);
    }
    k = state->k;
    ae_vector_set_length(w, k, _state);
    ae_matrix_set_length(z, n, k, _state);
    for(i=0; i<=k-1; i++)
    {
        w->ptr.p_double[i] = state->rw.ptr.p_double[i];
    }
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=k-1; j++)
        {
            z->ptr.pp_double[i][j] = state->rq.ptr.pp_double[i][j];
        }
    }
    rep->iterationscount = state->repiterationscount;
    ae_frame_leave(_state);
}


/*************************************************************************
This  function runs eigensolver for dense NxN symmetric matrix A, given by
upper or lower triangle.

This function can not process nonsymmetric matrices.

INPUT PARAMETERS:
    State       -   solver state
    A           -   NxN symmetric matrix given by one of its triangles
    IsUpper     -   whether upper or lower triangle of  A  is  given  (the
                    other one is not referenced at all).

OUTPUT PARAMETERS:
    W           -   array[K], top  K  eigenvalues ordered  by   descending
                    of their absolute values
    Z           -   array[N,K], matrix of eigenvectors found
    Rep         -   report with additional parameters

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
void eigsubspacesolvesparses(eigsubspacestate* state,
     sparsematrix* a,
     ae_bool isupper,
     /* Real    */ ae_vector* w,
     /* Real    */ ae_matrix* z,
     eigsubspacereport* rep,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;

    ae_vector_clear(w);
    ae_matrix_clear(z);
    _eigsubspacereport_clear(rep);

    ae_assert(!state->running, "EigSubspaceSolveSparseS: solver is still running", _state);
    n = state->n;
    state->matrixtype = 0;
    ae_vector_set_length(&state->rstate.ia, 7+1, _state);
    ae_vector_set_length(&state->rstate.ra, 1+1, _state);
    state->rstate.stage = -1;
    evd_clearrfields(state, _state);
    while(eigsubspaceiteration(state, _state))
    {
        ae_assert(state->requesttype==0, "EigSubspaceSolveDense: integrity check failed", _state);
        ae_assert(state->requestsize>0, "EigSubspaceSolveDense: integrity check failed", _state);
        sparsesmm(a, isupper, &state->x, state->requestsize, &state->ax, _state);
    }
    k = state->k;
    ae_vector_set_length(w, k, _state);
    ae_matrix_set_length(z, n, k, _state);
    for(i=0; i<=k-1; i++)
    {
        w->ptr.p_double[i] = state->rw.ptr.p_double[i];
    }
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=k-1; j++)
        {
            z->ptr.pp_double[i][j] = state->rq.ptr.pp_double[i][j];
        }
    }
    rep->iterationscount = state->repiterationscount;
}


/*************************************************************************
Internal r-comm function.

  -- ALGLIB --
     Copyright 16.01.2017 by Bochkanov Sergey
*************************************************************************/
ae_bool eigsubspaceiteration(eigsubspacestate* state, ae_state *_state)
{
    ae_int_t n;
    ae_int_t nwork;
    ae_int_t k;
    ae_int_t cnt;
    ae_int_t i;
    ae_int_t i1;
    ae_int_t j;
    double vv;
    double v;
    ae_int_t convcnt;
    ae_bool result;


    /*
     * Reverse communication preparations
     * I know it looks ugly, but it works the same way
     * anywhere from C++ to Python.
     *
     * This code initializes locals by:
     * * random values determined during code
     *   generation - on first subroutine call
     * * values from previous call - on subsequent calls
     */
    if( state->rstate.stage>=0 )
    {
        n = state->rstate.ia.ptr.p_int[0];
        nwork = state->rstate.ia.ptr.p_int[1];
        k = state->rstate.ia.ptr.p_int[2];
        cnt = state->rstate.ia.ptr.p_int[3];
        i = state->rstate.ia.ptr.p_int[4];
        i1 = state->rstate.ia.ptr.p_int[5];
        j = state->rstate.ia.ptr.p_int[6];
        convcnt = state->rstate.ia.ptr.p_int[7];
        vv = state->rstate.ra.ptr.p_double[0];
        v = state->rstate.ra.ptr.p_double[1];
    }
    else
    {
        n = 359;
        nwork = -58;
        k = -919;
        cnt = -909;
        i = 81;
        i1 = 255;
        j = 74;
        convcnt = -788;
        vv = 809;
        v = 205;
    }
    if( state->rstate.stage==0 )
    {
        goto lbl_0;
    }

    /*
     * Routine body
     */
    n = state->n;
    k = state->k;
    nwork = state->nwork;

    /*
     * Initialize RNG. Deterministic initialization (with fixed
     * seed) is required because we need deterministic behavior
     * of the entire solver.
     */
    hqrndseed(453, 463664, &state->rs, _state);

    /*
     * Prepare iteration
     * Initialize QNew with random orthogonal matrix (or reuse its previous value).
     */
    state->repiterationscount = 0;
    rmatrixsetlengthatleast(&state->qcur, nwork, n, _state);
    rmatrixsetlengthatleast(&state->qnew, nwork, n, _state);
    rmatrixsetlengthatleast(&state->znew, nwork, n, _state);
    rvectorsetlengthatleast(&state->wcur, nwork, _state);
    rvectorsetlengthatleast(&state->wprev, nwork, _state);
    rvectorsetlengthatleast(&state->wrank, nwork, _state);
    rmatrixsetlengthatleast(&state->x, n, nwork, _state);
    rmatrixsetlengthatleast(&state->ax, n, nwork, _state);
    rmatrixsetlengthatleast(&state->rq, n, k, _state);
    rvectorsetlengthatleast(&state->rw, k, _state);
    rmatrixsetlengthatleast(&state->rz, nwork, k, _state);
    rmatrixsetlengthatleast(&state->r, nwork, nwork, _state);
    for(i=0; i<=nwork-1; i++)
    {
        state->wprev.ptr.p_double[i] = -1.0;
    }
    if( !state->usewarmstart||state->firstcall )
    {

        /*
         * Use Q0 (either no warm start request, or warm start was
         * requested by user - but it is first call).
         *
         */
        if( state->firstcall )
        {

            /*
             * First call, generate Q0
             */
            for(i=0; i<=nwork-1; i++)
            {
                for(j=0; j<=n-1; j++)
                {
                    state->znew.ptr.pp_double[i][j] = hqrnduniformr(&state->rs, _state)-0.5;
                }
            }
            rmatrixlq(&state->znew, nwork, n, &state->tau, _state);
            rmatrixlqunpackq(&state->znew, nwork, n, &state->tau, nwork, &state->q0, _state);
            state->firstcall = ae_false;
        }
        rmatrixcopy(nwork, n, &state->q0, 0, 0, &state->qnew, 0, 0, _state);
    }

    /*
     * Start iteration
     */
    state->repiterationscount = 0;
    convcnt = 0;
lbl_1:
    if( !((state->maxits==0||state->repiterationscount<state->maxits)&&convcnt<evd_stepswithintol) )
    {
        goto lbl_2;
    }

    /*
     * Update QCur := QNew
     *
     * Calculate A*Q'
     */
    rmatrixcopy(nwork, n, &state->qnew, 0, 0, &state->qcur, 0, 0, _state);
    rmatrixtranspose(nwork, n, &state->qcur, 0, 0, &state->x, 0, 0, _state);
    evd_clearrfields(state, _state);
    state->requesttype = 0;
    state->requestsize = nwork;
    state->rstate.stage = 0;
    goto lbl_rcomm;
lbl_0:

    /*
     * Perform Rayleigh-Ritz step to estimate convergence of diagonal eigenvalues
     */
    if( ae_fp_greater(state->eps,(double)(0)) )
    {
        ae_assert(state->matrixtype==0, "integrity check failed", _state);
        rmatrixsetlengthatleast(&state->r, nwork, nwork, _state);
        rmatrixgemm(nwork, nwork, n, 1.0, &state->qcur, 0, 0, 0, &state->ax, 0, 0, 0, 0.0, &state->r, 0, 0, _state);
        if( !smatrixevd(&state->r, nwork, 0, ae_true, &state->wcur, &state->dummy, _state) )
        {
            ae_assert(ae_false, "EigSubspace: direct eigensolver failed to converge", _state);
        }
        for(j=0; j<=nwork-1; j++)
        {
            state->wrank.ptr.p_double[j] = ae_fabs(state->wcur.ptr.p_double[j], _state);
        }
        rankxuntied(&state->wrank, nwork, &state->buf, _state);
        v = (double)(0);
        vv = (double)(0);
        for(j=0; j<=nwork-1; j++)
        {
            if( ae_fp_greater_eq(state->wrank.ptr.p_double[j],(double)(nwork-k)) )
            {
                v = ae_maxreal(v, ae_fabs(state->wcur.ptr.p_double[j]-state->wprev.ptr.p_double[j], _state), _state);
                vv = ae_maxreal(vv, ae_fabs(state->wcur.ptr.p_double[j], _state), _state);
            }
        }
        if( ae_fp_eq(vv,(double)(0)) )
        {
            vv = (double)(1);
        }
        if( ae_fp_less_eq(v,state->eps*vv) )
        {
            inc(&convcnt, _state);
        }
        else
        {
            convcnt = 0;
        }
        for(j=0; j<=nwork-1; j++)
        {
            state->wprev.ptr.p_double[j] = state->wcur.ptr.p_double[j];
        }
    }

    /*
     * QR renormalization and update of QNew
     */
    rmatrixtranspose(n, nwork, &state->ax, 0, 0, &state->znew, 0, 0, _state);
    rmatrixlq(&state->znew, nwork, n, &state->tau, _state);
    rmatrixlqunpackq(&state->znew, nwork, n, &state->tau, nwork, &state->qnew, _state);

    /*
     * Update iteration index
     */
    state->repiterationscount = state->repiterationscount+1;
    goto lbl_1;
lbl_2:

    /*
     * Perform Rayleigh-Ritz step: find true eigenpairs in NWork-dimensional
     * subspace.
     */
    ae_assert(state->matrixtype==0, "integrity check failed", _state);
    ae_assert(state->eigenvectorsneeded==1, "Assertion failed", _state);
    rmatrixgemm(nwork, nwork, n, 1.0, &state->qcur, 0, 0, 0, &state->ax, 0, 0, 0, 0.0, &state->r, 0, 0, _state);
    if( !smatrixevd(&state->r, nwork, 1, ae_true, &state->tw, &state->tz, _state) )
    {
        ae_assert(ae_false, "EigSubspace: direct eigensolver failed to converge", _state);
    }

    /*
     * Reorder eigenpairs according to their absolute magnitude, select
     * K top ones. This reordering algorithm is very inefficient and has
     * O(NWork*K) running time, but it is still faster than other parts
     * of the solver, so we may use it.
     *
     * Then, we transform RZ to RQ (full N-dimensional representation).
     * After this part is done, RW and RQ contain solution.
     */
    for(j=0; j<=nwork-1; j++)
    {
        state->wrank.ptr.p_double[j] = ae_fabs(state->tw.ptr.p_double[j], _state);
    }
    rankxuntied(&state->wrank, nwork, &state->buf, _state);
    cnt = 0;
    for(i=nwork-1; i>=nwork-k; i--)
    {
        for(i1=0; i1<=nwork-1; i1++)
        {
            if( ae_fp_eq(state->wrank.ptr.p_double[i1],(double)(i)) )
            {
                ae_assert(cnt<k, "EigSubspace: integrity check failed", _state);
                state->rw.ptr.p_double[cnt] = state->tw.ptr.p_double[i1];
                for(j=0; j<=nwork-1; j++)
                {
                    state->rz.ptr.pp_double[j][cnt] = state->tz.ptr.pp_double[j][i1];
                }
                cnt = cnt+1;
            }
        }
    }
    ae_assert(cnt==k, "EigSubspace: integrity check failed", _state);
    rmatrixgemm(n, k, nwork, 1.0, &state->qcur, 0, 0, 1, &state->rz, 0, 0, 0, 0.0, &state->rq, 0, 0, _state);
    result = ae_false;
    return result;

    /*
     * Saving state
     */
lbl_rcomm:
    result = ae_true;
    state->rstate.ia.ptr.p_int[0] = n;
    state->rstate.ia.ptr.p_int[1] = nwork;
    state->rstate.ia.ptr.p_int[2] = k;
    state->rstate.ia.ptr.p_int[3] = cnt;
    state->rstate.ia.ptr.p_int[4] = i;
    state->rstate.ia.ptr.p_int[5] = i1;
    state->rstate.ia.ptr.p_int[6] = j;
    state->rstate.ia.ptr.p_int[7] = convcnt;
    state->rstate.ra.ptr.p_double[0] = vv;
    state->rstate.ra.ptr.p_double[1] = v;
    return result;
}


/*************************************************************************
Finding the eigenvalues and eigenvectors of a symmetric matrix

The algorithm finds eigen pairs of a symmetric matrix by reducing it to
tridiagonal form and using the QL/QR algorithm.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   symmetric matrix which is given by its upper or lower
                triangular part.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpper -   storage format.

Output parameters:
    D       -   eigenvalues in ascending order.
                Array whose index ranges within [0..N-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains the eigenvectors.
                Array whose indexes range within [0..N-1, 0..N-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged (rare case).

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
ae_bool smatrixevd(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_int_t zneeded,
     ae_bool isupper,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_vector tau;
    ae_vector e;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&tau, 0, sizeof(tau));
    memset(&e, 0, sizeof(e));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_clear(d);
    ae_matrix_clear(z);
    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);

    ae_assert(zneeded==0||zneeded==1, "SMatrixEVD: incorrect ZNeeded", _state);
    smatrixtd(a, n, isupper, &tau, d, &e, _state);
    if( zneeded==1 )
    {
        smatrixtdunpackq(a, n, isupper, &tau, z, _state);
    }
    result = smatrixtdevd(d, &e, n, zneeded, z, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Subroutine for finding the eigenvalues (and eigenvectors) of  a  symmetric
matrix  in  a  given half open interval (A, B] by using  a  bisection  and
inverse iteration

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   symmetric matrix which is given by its upper or lower
                triangular part. Array [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    B1, B2 -    half open interval (B1, B2] to search eigenvalues in.

Output parameters:
    M       -   number of eigenvalues found in a given half-interval (M>=0).
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..M-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..M-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if successful. M contains the number of eigenvalues in the given
    half-interval (could be equal to 0), W contains the eigenvalues,
    Z contains the eigenvectors (if needed).

    False, if the bisection method subroutine wasn't able to find the
    eigenvalues in the given interval or if the inverse iteration subroutine
    wasn't able to find all the corresponding eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned,
    M is equal to 0.

  -- ALGLIB --
     Copyright 07.01.2006 by Bochkanov Sergey
*************************************************************************/
ae_bool smatrixevdr(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_int_t zneeded,
     ae_bool isupper,
     double b1,
     double b2,
     ae_int_t* m,
     /* Real    */ ae_vector* w,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_vector tau;
    ae_vector e;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&tau, 0, sizeof(tau));
    memset(&e, 0, sizeof(e));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    *m = 0;
    ae_vector_clear(w);
    ae_matrix_clear(z);
    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);

    ae_assert(zneeded==0||zneeded==1, "SMatrixTDEVDR: incorrect ZNeeded", _state);
    smatrixtd(a, n, isupper, &tau, w, &e, _state);
    if( zneeded==1 )
    {
        smatrixtdunpackq(a, n, isupper, &tau, z, _state);
    }
    result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, z, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Subroutine for finding the eigenvalues and  eigenvectors  of  a  symmetric
matrix with given indexes by using bisection and inverse iteration methods.

Input parameters:
    A       -   symmetric matrix which is given by its upper or lower
                triangular part. Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    I1, I2 -    index interval for searching (from I1 to I2).
                0 <= I1 <= I2 <= N-1.

Output parameters:
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..I2-I1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..I2-I1].
                In that case, the eigenvectors are stored in the matrix columns.

Result:
    True, if successful. W contains the eigenvalues, Z contains the
    eigenvectors (if needed).

    False, if the bisection method subroutine wasn't able to find the
    eigenvalues in the given interval or if the inverse iteration subroutine
    wasn't able to find all the corresponding eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned.

  -- ALGLIB --
     Copyright 07.01.2006 by Bochkanov Sergey
*************************************************************************/
ae_bool smatrixevdi(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_int_t zneeded,
     ae_bool isupper,
     ae_int_t i1,
     ae_int_t i2,
     /* Real    */ ae_vector* w,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_vector tau;
    ae_vector e;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&tau, 0, sizeof(tau));
    memset(&e, 0, sizeof(e));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_clear(w);
    ae_matrix_clear(z);
    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);

    ae_assert(zneeded==0||zneeded==1, "SMatrixEVDI: incorrect ZNeeded", _state);
    smatrixtd(a, n, isupper, &tau, w, &e, _state);
    if( zneeded==1 )
    {
        smatrixtdunpackq(a, n, isupper, &tau, z, _state);
    }
    result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, z, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Finding the eigenvalues and eigenvectors of a Hermitian matrix

The algorithm finds eigen pairs of a Hermitian matrix by  reducing  it  to
real tridiagonal form and using the QL/QR algorithm.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    A       -   Hermitian matrix which is given  by  its  upper  or  lower
                triangular part.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format.
    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
                not. If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.

Output parameters:
    D       -   eigenvalues in ascending order.
                Array whose index ranges within [0..N-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains the eigenvectors.
                Array whose indexes range within [0..N-1, 0..N-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged (rare case).

Note:
    eigenvectors of Hermitian matrix are defined up to  multiplication  by
    a complex number L, such that |L|=1.

  -- ALGLIB --
     Copyright 2005, 23 March 2007 by Bochkanov Sergey
*************************************************************************/
ae_bool hmatrixevd(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_int_t zneeded,
     ae_bool isupper,
     /* Real    */ ae_vector* d,
     /* Complex */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_vector tau;
    ae_vector e;
    ae_matrix t;
    ae_matrix qz;
    ae_matrix q;
    ae_int_t i;
    ae_int_t j;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&tau, 0, sizeof(tau));
    memset(&e, 0, sizeof(e));
    memset(&t, 0, sizeof(t));
    memset(&qz, 0, sizeof(qz));
    memset(&q, 0, sizeof(q));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_clear(d);
    ae_matrix_clear(z);
    ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&qz, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);

    ae_assert(zneeded==0||zneeded==1, "HermitianEVD: incorrect ZNeeded", _state);

    /*
     * Reduce to tridiagonal form
     */
    hmatrixtd(a, n, isupper, &tau, d, &e, _state);
    if( zneeded==1 )
    {
        hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
        zneeded = 2;
    }

    /*
     * TDEVD
     */
    result = smatrixtdevd(d, &e, n, zneeded, &t, _state);

    /*
     * Eigenvectors are needed
     * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
     */
    if( result&&zneeded!=0 )
    {
        ae_matrix_set_length(z, n, n, _state);
        ae_matrix_set_length(&qz, n, 2*n, _state);

        /*
         * Calculate Re(Q)*T
         */
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                qz.ptr.pp_double[i][j] = q.ptr.pp_complex[i][j].x;
            }
        }
        rmatrixgemm(n, n, n, 1.0, &qz, 0, 0, 0, &t, 0, 0, 0, 0.0, &qz, 0, n, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                z->ptr.pp_complex[i][j].x = qz.ptr.pp_double[i][n+j];
            }
        }

        /*
         * Calculate Im(Q)*T
         */
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                qz.ptr.pp_double[i][j] = q.ptr.pp_complex[i][j].y;
            }
        }
        rmatrixgemm(n, n, n, 1.0, &qz, 0, 0, 0, &t, 0, 0, 0, 0.0, &qz, 0, n, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                z->ptr.pp_complex[i][j].y = qz.ptr.pp_double[i][n+j];
            }
        }
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Subroutine for finding the eigenvalues (and eigenvectors) of  a  Hermitian
matrix  in  a  given half-interval (A, B] by using a bisection and inverse
iteration

Input parameters:
    A       -   Hermitian matrix which is given  by  its  upper  or  lower
                triangular  part.  Array  whose   indexes   range   within
                [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
                not. If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    B1, B2 -    half-interval (B1, B2] to search eigenvalues in.

Output parameters:
    M       -   number of eigenvalues found in a given half-interval, M>=0
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..M-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..M-1].
                The eigenvectors are stored in the matrix columns.

Result:
    True, if successful. M contains the number of eigenvalues in the given
    half-interval (could be equal to 0), W contains the eigenvalues,
    Z contains the eigenvectors (if needed).

    False, if the bisection method subroutine  wasn't  able  to  find  the
    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
    subroutine  wasn't  able  to  find all the corresponding eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned, M  is
    equal to 0.

Note:
    eigen vectors of Hermitian matrix are defined up to multiplication  by
    a complex number L, such as |L|=1.

  -- ALGLIB --
     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
*************************************************************************/
ae_bool hmatrixevdr(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_int_t zneeded,
     ae_bool isupper,
     double b1,
     double b2,
     ae_int_t* m,
     /* Real    */ ae_vector* w,
     /* Complex */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_matrix q;
    ae_matrix t;
    ae_vector tau;
    ae_vector e;
    ae_vector work;
    ae_int_t i;
    ae_int_t k;
    double v;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&q, 0, sizeof(q));
    memset(&t, 0, sizeof(t));
    memset(&tau, 0, sizeof(tau));
    memset(&e, 0, sizeof(e));
    memset(&work, 0, sizeof(work));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    *m = 0;
    ae_vector_clear(w);
    ae_matrix_clear(z);
    ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);

    ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsInInterval: incorrect ZNeeded", _state);

    /*
     * Reduce to tridiagonal form
     */
    hmatrixtd(a, n, isupper, &tau, w, &e, _state);
    if( zneeded==1 )
    {
        hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
        zneeded = 2;
    }

    /*
     * Bisection and inverse iteration
     */
    result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, &t, _state);

    /*
     * Eigenvectors are needed
     * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
     */
    if( (result&&zneeded!=0)&&*m!=0 )
    {
        ae_vector_set_length(&work, *m-1+1, _state);
        ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
        for(i=0; i<=n-1; i++)
        {

            /*
             * Calculate real part
             */
            for(k=0; k<=*m-1; k++)
            {
                work.ptr.p_double[k] = (double)(0);
            }
            for(k=0; k<=n-1; k++)
            {
                v = q.ptr.pp_complex[i][k].x;
                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v);
            }
            for(k=0; k<=*m-1; k++)
            {
                z->ptr.pp_complex[i][k].x = work.ptr.p_double[k];
            }

            /*
             * Calculate imaginary part
             */
            for(k=0; k<=*m-1; k++)
            {
                work.ptr.p_double[k] = (double)(0);
            }
            for(k=0; k<=n-1; k++)
            {
                v = q.ptr.pp_complex[i][k].y;
                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v);
            }
            for(k=0; k<=*m-1; k++)
            {
                z->ptr.pp_complex[i][k].y = work.ptr.p_double[k];
            }
        }
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Subroutine for finding the eigenvalues and  eigenvectors  of  a  Hermitian
matrix with given indexes by using bisection and inverse iteration methods

Input parameters:
    A       -   Hermitian matrix which is given  by  its  upper  or  lower
                triangular part.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
                not. If ZNeeded is equal to:
                 * 0, the eigenvectors are not returned;
                 * 1, the eigenvectors are returned.
    IsUpperA -  storage format of matrix A.
    I1, I2 -    index interval for searching (from I1 to I2).
                0 <= I1 <= I2 <= N-1.

Output parameters:
    W       -   array of the eigenvalues found.
                Array whose index ranges within [0..I2-I1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains eigenvectors.
                Array whose indexes range within [0..N-1, 0..I2-I1].
                In  that  case,  the eigenvectors are stored in the matrix
                columns.

Result:
    True, if successful. W contains the eigenvalues, Z contains the
    eigenvectors (if needed).

    False, if the bisection method subroutine  wasn't  able  to  find  the
    eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
    subroutine wasn't able to find  all  the  corresponding  eigenvectors.
    In that case, the eigenvalues and eigenvectors are not returned.

Note:
    eigen vectors of Hermitian matrix are defined up to multiplication  by
    a complex number L, such as |L|=1.

  -- ALGLIB --
     Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
*************************************************************************/
ae_bool hmatrixevdi(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_int_t zneeded,
     ae_bool isupper,
     ae_int_t i1,
     ae_int_t i2,
     /* Real    */ ae_vector* w,
     /* Complex */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_matrix q;
    ae_matrix t;
    ae_vector tau;
    ae_vector e;
    ae_vector work;
    ae_int_t i;
    ae_int_t k;
    double v;
    ae_int_t m;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&q, 0, sizeof(q));
    memset(&t, 0, sizeof(t));
    memset(&tau, 0, sizeof(tau));
    memset(&e, 0, sizeof(e));
    memset(&work, 0, sizeof(work));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_clear(w);
    ae_matrix_clear(z);
    ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);

    ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsByIndexes: incorrect ZNeeded", _state);

    /*
     * Reduce to tridiagonal form
     */
    hmatrixtd(a, n, isupper, &tau, w, &e, _state);
    if( zneeded==1 )
    {
        hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
        zneeded = 2;
    }

    /*
     * Bisection and inverse iteration
     */
    result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, &t, _state);

    /*
     * Eigenvectors are needed
     * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
     */
    m = i2-i1+1;
    if( result&&zneeded!=0 )
    {
        ae_vector_set_length(&work, m-1+1, _state);
        ae_matrix_set_length(z, n-1+1, m-1+1, _state);
        for(i=0; i<=n-1; i++)
        {

            /*
             * Calculate real part
             */
            for(k=0; k<=m-1; k++)
            {
                work.ptr.p_double[k] = (double)(0);
            }
            for(k=0; k<=n-1; k++)
            {
                v = q.ptr.pp_complex[i][k].x;
                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v);
            }
            for(k=0; k<=m-1; k++)
            {
                z->ptr.pp_complex[i][k].x = work.ptr.p_double[k];
            }

            /*
             * Calculate imaginary part
             */
            for(k=0; k<=m-1; k++)
            {
                work.ptr.p_double[k] = (double)(0);
            }
            for(k=0; k<=n-1; k++)
            {
                v = q.ptr.pp_complex[i][k].y;
                ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v);
            }
            for(k=0; k<=m-1; k++)
            {
                z->ptr.pp_complex[i][k].y = work.ptr.p_double[k];
            }
        }
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix

The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by
using an QL/QR algorithm with implicit shifts.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

Input parameters:
    D       -   the main diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-1].
    E       -   the secondary diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix A.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not needed;
                 * 1, the eigenvectors of a tridiagonal matrix
                   are multiplied by the square matrix Z. It is used if the
                   tridiagonal matrix is obtained by the similarity
                   transformation of a symmetric matrix;
                 * 2, the eigenvectors of a tridiagonal matrix replace the
                   square matrix Z;
                 * 3, matrix Z contains the first row of the eigenvectors
                   matrix.
    Z       -   if ZNeeded=1, Z contains the square matrix by which the
                eigenvectors are multiplied.
                Array whose indexes range within [0..N-1, 0..N-1].

Output parameters:
    D       -   eigenvalues in ascending order.
                Array whose index ranges within [0..N-1].
    Z       -   if ZNeeded is equal to:
                 * 0, Z hasn't changed;
                 * 1, Z contains the product of a given matrix (from the left)
                   and the eigenvectors matrix (from the right);
                 * 2, Z contains the eigenvectors.
                 * 3, Z contains the first row of the eigenvectors matrix.
                If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1].
                In that case, the eigenvectors are stored in the matrix columns.
                If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1].

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     September 30, 1994
*************************************************************************/
ae_bool smatrixtdevd(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_int_t zneeded,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _e;
    ae_vector d1;
    ae_vector e1;
    ae_vector ex;
    ae_matrix z1;
    ae_int_t i;
    ae_int_t j;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_e, 0, sizeof(_e));
    memset(&d1, 0, sizeof(d1));
    memset(&e1, 0, sizeof(e1));
    memset(&ex, 0, sizeof(ex));
    memset(&z1, 0, sizeof(z1));
    ae_vector_init_copy(&_e, e, _state, ae_true);
    e = &_e;
    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&z1, 0, 0, DT_REAL, _state, ae_true);

    ae_assert(n>=1, "SMatrixTDEVD: N<=0", _state);
    ae_assert(zneeded>=0&&zneeded<=3, "SMatrixTDEVD: incorrect ZNeeded", _state);
    result = ae_false;

    /*
     * Preprocess Z: make ZNeeded equal to 0, 1 or 3.
     * Ensure that memory for Z is allocated.
     */
    if( zneeded==2 )
    {

        /*
         * Load identity to Z
         */
        rmatrixsetlengthatleast(z, n, n, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                z->ptr.pp_double[i][j] = 0.0;
            }
            z->ptr.pp_double[i][i] = 1.0;
        }
        zneeded = 1;
    }
    if( zneeded==3 )
    {

        /*
         * Allocate memory
         */
        rmatrixsetlengthatleast(z, 1, n, _state);
    }

    /*
     * Try to solve problem with MKL
     */
    ae_vector_set_length(&ex, n, _state);
    for(i=0; i<=n-2; i++)
    {
        ex.ptr.p_double[i] = e->ptr.p_double[i];
    }
    if( smatrixtdevdmkl(d, &ex, n, zneeded, z, &result, _state) )
    {
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Prepare 1-based task
     */
    ae_vector_set_length(&d1, n+1, _state);
    ae_vector_set_length(&e1, n+1, _state);
    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
    if( n>1 )
    {
        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
    }
    if( zneeded==1 )
    {
        ae_matrix_set_length(&z1, n+1, n+1, _state);
        for(i=1; i<=n; i++)
        {
            ae_v_move(&z1.ptr.pp_double[i][1], 1, &z->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
        }
    }

    /*
     * Solve 1-based task
     */
    result = evd_tridiagonalevd(&d1, &e1, n, zneeded, &z1, _state);
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Convert back to 0-based result
     */
    ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1));
    if( zneeded!=0 )
    {
        if( zneeded==1 )
        {
            for(i=1; i<=n; i++)
            {
                ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1));
            }
            ae_frame_leave(_state);
            return result;
        }
        if( zneeded==2 )
        {
            ae_matrix_set_length(z, n-1+1, n-1+1, _state);
            for(i=1; i<=n; i++)
            {
                ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1));
            }
            ae_frame_leave(_state);
            return result;
        }
        if( zneeded==3 )
        {
            ae_matrix_set_length(z, 0+1, n-1+1, _state);
            ae_v_move(&z->ptr.pp_double[0][0], 1, &z1.ptr.pp_double[1][1], 1, ae_v_len(0,n-1));
            ae_frame_leave(_state);
            return result;
        }
        ae_assert(ae_false, "SMatrixTDEVD: Incorrect ZNeeded!", _state);
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a
given half-interval (A, B] by using bisection and inverse iteration.

Input parameters:
    D       -   the main diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-1].
    E       -   the secondary diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix, N>=0.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not needed;
                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
                   by the square matrix Z. It is used if the tridiagonal
                   matrix is obtained by the similarity transformation
                   of a symmetric matrix.
                 * 2, the eigenvectors of a tridiagonal matrix replace matrix Z.
    A, B    -   half-interval (A, B] to search eigenvalues in.
    Z       -   if ZNeeded is equal to:
                 * 0, Z isn't used and remains unchanged;
                 * 1, Z contains the square matrix (array whose indexes range
                   within [0..N-1, 0..N-1]) which reduces the given symmetric
                   matrix to tridiagonal form;
                 * 2, Z isn't used (but changed on the exit).

Output parameters:
    D       -   array of the eigenvalues found.
                Array whose index ranges within [0..M-1].
    M       -   number of eigenvalues found in the given half-interval (M>=0).
    Z       -   if ZNeeded is equal to:
                 * 0, doesn't contain any information;
                 * 1, contains the product of a given NxN matrix Z (from the
                   left) and NxM matrix of the eigenvectors found (from the
                   right). Array whose indexes range within [0..N-1, 0..M-1].
                 * 2, contains the matrix of the eigenvectors found.
                   Array whose indexes range within [0..N-1, 0..M-1].

Result:

    True, if successful. In that case, M contains the number of eigenvalues
    in the given half-interval (could be equal to 0), D contains the eigenvalues,
    Z contains the eigenvectors (if needed).
    It should be noted that the subroutine changes the size of arrays D and Z.

    False, if the bisection method subroutine wasn't able to find the
    eigenvalues in the given interval or if the inverse iteration subroutine
    wasn't able to find all the corresponding eigenvectors. In that case,
    the eigenvalues and eigenvectors are not returned, M is equal to 0.

  -- ALGLIB --
     Copyright 31.03.2008 by Bochkanov Sergey
*************************************************************************/
ae_bool smatrixtdevdr(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_int_t zneeded,
     double a,
     double b,
     ae_int_t* m,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t errorcode;
    ae_int_t nsplit;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t cr;
    ae_vector iblock;
    ae_vector isplit;
    ae_vector ifail;
    ae_vector d1;
    ae_vector e1;
    ae_vector w;
    ae_matrix z2;
    ae_matrix z3;
    double v;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&iblock, 0, sizeof(iblock));
    memset(&isplit, 0, sizeof(isplit));
    memset(&ifail, 0, sizeof(ifail));
    memset(&d1, 0, sizeof(d1));
    memset(&e1, 0, sizeof(e1));
    memset(&w, 0, sizeof(w));
    memset(&z2, 0, sizeof(z2));
    memset(&z3, 0, sizeof(z3));
    *m = 0;
    ae_vector_init(&iblock, 0, DT_INT, _state, ae_true);
    ae_vector_init(&isplit, 0, DT_INT, _state, ae_true);
    ae_vector_init(&ifail, 0, DT_INT, _state, ae_true);
    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true);

    ae_assert(zneeded>=0&&zneeded<=2, "SMatrixTDEVDR: incorrect ZNeeded!", _state);

    /*
     * Special cases
     */
    if( ae_fp_less_eq(b,a) )
    {
        *m = 0;
        result = ae_true;
        ae_frame_leave(_state);
        return result;
    }
    if( n<=0 )
    {
        *m = 0;
        result = ae_true;
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Copy D,E to D1, E1
     */
    ae_vector_set_length(&d1, n+1, _state);
    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
    if( n>1 )
    {
        ae_vector_set_length(&e1, n-1+1, _state);
        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
    }

    /*
     * No eigen vectors
     */
    if( zneeded==0 )
    {
        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 1, a, b, 0, 0, (double)(-1), &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
        if( !result||*m==0 )
        {
            *m = 0;
            ae_frame_leave(_state);
            return result;
        }
        ae_vector_set_length(d, *m-1+1, _state);
        ae_v_move(&d->ptr.p_double[0], 1, &w.ptr.p_double[1], 1, ae_v_len(0,*m-1));
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Eigen vectors are multiplied by Z
     */
    if( zneeded==1 )
    {

        /*
         * Find eigen pairs
         */
        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, (double)(-1), &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
        if( !result||*m==0 )
        {
            *m = 0;
            ae_frame_leave(_state);
            return result;
        }
        evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
        if( cr!=0 )
        {
            *m = 0;
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Sort eigen values and vectors
         */
        for(i=1; i<=*m; i++)
        {
            k = i;
            for(j=i; j<=*m; j++)
            {
                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
                {
                    k = j;
                }
            }
            v = w.ptr.p_double[i];
            w.ptr.p_double[i] = w.ptr.p_double[k];
            w.ptr.p_double[k] = v;
            for(j=1; j<=n; j++)
            {
                v = z2.ptr.pp_double[j][i];
                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
                z2.ptr.pp_double[j][k] = v;
            }
        }

        /*
         * Transform Z2 and overwrite Z
         */
        ae_matrix_set_length(&z3, *m+1, n+1, _state);
        for(i=1; i<=*m; i++)
        {
            ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n));
        }
        for(i=1; i<=n; i++)
        {
            for(j=1; j<=*m; j++)
            {
                v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1));
                z2.ptr.pp_double[i][j] = v;
            }
        }
        ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
        for(i=1; i<=*m; i++)
        {
            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
        }

        /*
         * Store W
         */
        ae_vector_set_length(d, *m-1+1, _state);
        for(i=1; i<=*m; i++)
        {
            d->ptr.p_double[i-1] = w.ptr.p_double[i];
        }
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Eigen vectors are stored in Z
     */
    if( zneeded==2 )
    {

        /*
         * Find eigen pairs
         */
        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, (double)(-1), &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
        if( !result||*m==0 )
        {
            *m = 0;
            ae_frame_leave(_state);
            return result;
        }
        evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
        if( cr!=0 )
        {
            *m = 0;
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Sort eigen values and vectors
         */
        for(i=1; i<=*m; i++)
        {
            k = i;
            for(j=i; j<=*m; j++)
            {
                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
                {
                    k = j;
                }
            }
            v = w.ptr.p_double[i];
            w.ptr.p_double[i] = w.ptr.p_double[k];
            w.ptr.p_double[k] = v;
            for(j=1; j<=n; j++)
            {
                v = z2.ptr.pp_double[j][i];
                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
                z2.ptr.pp_double[j][k] = v;
            }
        }

        /*
         * Store W
         */
        ae_vector_set_length(d, *m-1+1, _state);
        for(i=1; i<=*m; i++)
        {
            d->ptr.p_double[i-1] = w.ptr.p_double[i];
        }
        ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
        for(i=1; i<=*m; i++)
        {
            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
        }
        ae_frame_leave(_state);
        return result;
    }
    result = ae_false;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Subroutine for finding tridiagonal matrix eigenvalues/vectors with given
indexes (in ascending order) by using the bisection and inverse iteraion.

Input parameters:
    D       -   the main diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-1].
    E       -   the secondary diagonal of a tridiagonal matrix.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix. N>=0.
    ZNeeded -   flag controlling whether the eigenvectors are needed or not.
                If ZNeeded is equal to:
                 * 0, the eigenvectors are not needed;
                 * 1, the eigenvectors of a tridiagonal matrix are multiplied
                   by the square matrix Z. It is used if the
                   tridiagonal matrix is obtained by the similarity transformation
                   of a symmetric matrix.
                 * 2, the eigenvectors of a tridiagonal matrix replace
                   matrix Z.
    I1, I2  -   index interval for searching (from I1 to I2).
                0 <= I1 <= I2 <= N-1.
    Z       -   if ZNeeded is equal to:
                 * 0, Z isn't used and remains unchanged;
                 * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1])
                   which reduces the given symmetric matrix to  tridiagonal form;
                 * 2, Z isn't used (but changed on the exit).

Output parameters:
    D       -   array of the eigenvalues found.
                Array whose index ranges within [0..I2-I1].
    Z       -   if ZNeeded is equal to:
                 * 0, doesn't contain any information;
                 * 1, contains the product of a given NxN matrix Z (from the left) and
                   Nx(I2-I1) matrix of the eigenvectors found (from the right).
                   Array whose indexes range within [0..N-1, 0..I2-I1].
                 * 2, contains the matrix of the eigenvalues found.
                   Array whose indexes range within [0..N-1, 0..I2-I1].


Result:

    True, if successful. In that case, D contains the eigenvalues,
    Z contains the eigenvectors (if needed).
    It should be noted that the subroutine changes the size of arrays D and Z.

    False, if the bisection method subroutine wasn't able to find the eigenvalues
    in the given interval or if the inverse iteration subroutine wasn't able
    to find all the corresponding eigenvectors. In that case, the eigenvalues
    and eigenvectors are not returned.

  -- ALGLIB --
     Copyright 25.12.2005 by Bochkanov Sergey
*************************************************************************/
ae_bool smatrixtdevdi(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_int_t zneeded,
     ae_int_t i1,
     ae_int_t i2,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t errorcode;
    ae_int_t nsplit;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t m;
    ae_int_t cr;
    ae_vector iblock;
    ae_vector isplit;
    ae_vector ifail;
    ae_vector w;
    ae_vector d1;
    ae_vector e1;
    ae_matrix z2;
    ae_matrix z3;
    double v;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&iblock, 0, sizeof(iblock));
    memset(&isplit, 0, sizeof(isplit));
    memset(&ifail, 0, sizeof(ifail));
    memset(&w, 0, sizeof(w));
    memset(&d1, 0, sizeof(d1));
    memset(&e1, 0, sizeof(e1));
    memset(&z2, 0, sizeof(z2));
    memset(&z3, 0, sizeof(z3));
    ae_vector_init(&iblock, 0, DT_INT, _state, ae_true);
    ae_vector_init(&isplit, 0, DT_INT, _state, ae_true);
    ae_vector_init(&ifail, 0, DT_INT, _state, ae_true);
    ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true);

    ae_assert((0<=i1&&i1<=i2)&&i2<n, "SMatrixTDEVDI: incorrect I1/I2!", _state);

    /*
     * Copy D,E to D1, E1
     */
    ae_vector_set_length(&d1, n+1, _state);
    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
    if( n>1 )
    {
        ae_vector_set_length(&e1, n-1+1, _state);
        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
    }

    /*
     * No eigen vectors
     */
    if( zneeded==0 )
    {
        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 1, (double)(0), (double)(0), i1+1, i2+1, (double)(-1), &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
        if( !result )
        {
            ae_frame_leave(_state);
            return result;
        }
        if( m!=i2-i1+1 )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }
        ae_vector_set_length(d, m-1+1, _state);
        for(i=1; i<=m; i++)
        {
            d->ptr.p_double[i-1] = w.ptr.p_double[i];
        }
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Eigen vectors are multiplied by Z
     */
    if( zneeded==1 )
    {

        /*
         * Find eigen pairs
         */
        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, (double)(0), (double)(0), i1+1, i2+1, (double)(-1), &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
        if( !result )
        {
            ae_frame_leave(_state);
            return result;
        }
        if( m!=i2-i1+1 )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }
        evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
        if( cr!=0 )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Sort eigen values and vectors
         */
        for(i=1; i<=m; i++)
        {
            k = i;
            for(j=i; j<=m; j++)
            {
                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
                {
                    k = j;
                }
            }
            v = w.ptr.p_double[i];
            w.ptr.p_double[i] = w.ptr.p_double[k];
            w.ptr.p_double[k] = v;
            for(j=1; j<=n; j++)
            {
                v = z2.ptr.pp_double[j][i];
                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
                z2.ptr.pp_double[j][k] = v;
            }
        }

        /*
         * Transform Z2 and overwrite Z
         */
        ae_matrix_set_length(&z3, m+1, n+1, _state);
        for(i=1; i<=m; i++)
        {
            ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n));
        }
        for(i=1; i<=n; i++)
        {
            for(j=1; j<=m; j++)
            {
                v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1));
                z2.ptr.pp_double[i][j] = v;
            }
        }
        ae_matrix_set_length(z, n-1+1, m-1+1, _state);
        for(i=1; i<=m; i++)
        {
            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
        }

        /*
         * Store W
         */
        ae_vector_set_length(d, m-1+1, _state);
        for(i=1; i<=m; i++)
        {
            d->ptr.p_double[i-1] = w.ptr.p_double[i];
        }
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Eigen vectors are stored in Z
     */
    if( zneeded==2 )
    {

        /*
         * Find eigen pairs
         */
        result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, (double)(0), (double)(0), i1+1, i2+1, (double)(-1), &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
        if( !result )
        {
            ae_frame_leave(_state);
            return result;
        }
        if( m!=i2-i1+1 )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }
        evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
        if( cr!=0 )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Sort eigen values and vectors
         */
        for(i=1; i<=m; i++)
        {
            k = i;
            for(j=i; j<=m; j++)
            {
                if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
                {
                    k = j;
                }
            }
            v = w.ptr.p_double[i];
            w.ptr.p_double[i] = w.ptr.p_double[k];
            w.ptr.p_double[k] = v;
            for(j=1; j<=n; j++)
            {
                v = z2.ptr.pp_double[j][i];
                z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
                z2.ptr.pp_double[j][k] = v;
            }
        }

        /*
         * Store Z
         */
        ae_matrix_set_length(z, n-1+1, m-1+1, _state);
        for(i=1; i<=m; i++)
        {
            ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
        }

        /*
         * Store W
         */
        ae_vector_set_length(d, m-1+1, _state);
        for(i=1; i<=m; i++)
        {
            d->ptr.p_double[i-1] = w.ptr.p_double[i];
        }
        ae_frame_leave(_state);
        return result;
    }
    result = ae_false;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Finding eigenvalues and eigenvectors of a general (unsymmetric) matrix

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The algorithm finds eigenvalues and eigenvectors of a general matrix by
using the QR algorithm with multiple shifts. The algorithm can find
eigenvalues and both left and right eigenvectors.

The right eigenvector is a vector x such that A*x = w*x, and the left
eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex
conjugate transposition of vector y).

Input parameters:
    A       -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    VNeeded -   flag controlling whether eigenvectors are needed or not.
                If VNeeded is equal to:
                 * 0, eigenvectors are not returned;
                 * 1, right eigenvectors are returned;
                 * 2, left eigenvectors are returned;
                 * 3, both left and right eigenvectors are returned.

Output parameters:
    WR      -   real parts of eigenvalues.
                Array whose index ranges within [0..N-1].
    WR      -   imaginary parts of eigenvalues.
                Array whose index ranges within [0..N-1].
    VL, VR  -   arrays of left and right eigenvectors (if they are needed).
                If WI[i]=0, the respective eigenvalue is a real number,
                and it corresponds to the column number I of matrices VL/VR.
                If WI[i]>0, we have a pair of complex conjugate numbers with
                positive and negative imaginary parts:
                    the first eigenvalue WR[i] + sqrt(-1)*WI[i];
                    the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1];
                    WI[i]>0
                    WI[i+1] = -WI[i] < 0
                In that case, the eigenvector  corresponding to the first
                eigenvalue is located in i and i+1 columns of matrices
                VL/VR (the column number i contains the real part, and the
                column number i+1 contains the imaginary part), and the vector
                corresponding to the second eigenvalue is a complex conjugate to
                the first vector.
                Arrays whose indexes range within [0..N-1, 0..N-1].

Result:
    True, if the algorithm has converged.
    False, if the algorithm has not converged.

Note 1:
    Some users may ask the following question: what if WI[N-1]>0?
    WI[N] must contain an eigenvalue which is complex conjugate to the
    N-th eigenvalue, but the array has only size N?
    The answer is as follows: such a situation cannot occur because the
    algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is
    strictly less than N-1.

Note 2:
    The algorithm performance depends on the value of the internal parameter
    NS of the InternalSchurDecomposition subroutine which defines the number
    of shifts in the QR algorithm (similarly to the block width in block-matrix
    algorithms of linear algebra). If you require maximum performance
    on your machine, it is recommended to adjust this parameter manually.


See also the InternalTREVC subroutine.

The algorithm is based on the LAPACK 3.0 library.
*************************************************************************/
ae_bool rmatrixevd(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_int_t vneeded,
     /* Real    */ ae_vector* wr,
     /* Real    */ ae_vector* wi,
     /* Real    */ ae_matrix* vl,
     /* Real    */ ae_matrix* vr,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_matrix a1;
    ae_matrix vl1;
    ae_matrix vr1;
    ae_matrix s1;
    ae_matrix s;
    ae_matrix dummy;
    ae_vector wr1;
    ae_vector wi1;
    ae_vector tau;
    ae_int_t i;
    ae_int_t info;
    ae_vector sel1;
    ae_int_t m1;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&a1, 0, sizeof(a1));
    memset(&vl1, 0, sizeof(vl1));
    memset(&vr1, 0, sizeof(vr1));
    memset(&s1, 0, sizeof(s1));
    memset(&s, 0, sizeof(s));
    memset(&dummy, 0, sizeof(dummy));
    memset(&wr1, 0, sizeof(wr1));
    memset(&wi1, 0, sizeof(wi1));
    memset(&tau, 0, sizeof(tau));
    memset(&sel1, 0, sizeof(sel1));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_clear(wr);
    ae_vector_clear(wi);
    ae_matrix_clear(vl);
    ae_matrix_clear(vr);
    ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&vl1, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&vr1, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&s1, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&dummy, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wr1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wi1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&sel1, 0, DT_BOOL, _state, ae_true);

    ae_assert(vneeded>=0&&vneeded<=3, "RMatrixEVD: incorrect VNeeded!", _state);
    if( vneeded==0 )
    {

        /*
         * Eigen values only
         */
        rmatrixhessenberg(a, n, &tau, _state);
        rmatrixinternalschurdecomposition(a, n, 0, 0, wr, wi, &dummy, &info, _state);
        result = info==0;
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Eigen values and vectors
     */
    rmatrixhessenberg(a, n, &tau, _state);
    rmatrixhessenbergunpackq(a, n, &tau, &s, _state);
    rmatrixinternalschurdecomposition(a, n, 1, 1, wr, wi, &s, &info, _state);
    result = info==0;
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }
    if( vneeded==1||vneeded==3 )
    {
        ae_matrix_set_length(vr, n, n, _state);
        for(i=0; i<=n-1; i++)
        {
            ae_v_move(&vr->ptr.pp_double[i][0], 1, &s.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
        }
    }
    if( vneeded==2||vneeded==3 )
    {
        ae_matrix_set_length(vl, n, n, _state);
        for(i=0; i<=n-1; i++)
        {
            ae_v_move(&vl->ptr.pp_double[i][0], 1, &s.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
        }
    }
    evd_rmatrixinternaltrevc(a, n, vneeded, 1, &sel1, vl, vr, &m1, &info, _state);
    result = info==0;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Clears request fileds (to be sure that we don't forgot to clear something)
*************************************************************************/
static void evd_clearrfields(eigsubspacestate* state, ae_state *_state)
{


    state->requesttype = -1;
    state->requestsize = -1;
}


static ae_bool evd_tridiagonalevd(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_int_t zneeded,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _e;
    ae_int_t maxit;
    ae_int_t i;
    ae_int_t ii;
    ae_int_t iscale;
    ae_int_t j;
    ae_int_t jtot;
    ae_int_t k;
    ae_int_t t;
    ae_int_t l;
    ae_int_t l1;
    ae_int_t lend;
    ae_int_t lendm1;
    ae_int_t lendp1;
    ae_int_t lendsv;
    ae_int_t lm1;
    ae_int_t lsv;
    ae_int_t m;
    ae_int_t mm1;
    ae_int_t nm1;
    ae_int_t nmaxit;
    ae_int_t tmpint;
    double anorm;
    double b;
    double c;
    double eps;
    double eps2;
    double f;
    double g;
    double p;
    double r;
    double rt1;
    double rt2;
    double s;
    double safmax;
    double safmin;
    double ssfmax;
    double ssfmin;
    double tst;
    double tmp;
    ae_vector work1;
    ae_vector work2;
    ae_vector workc;
    ae_vector works;
    ae_vector wtemp;
    ae_bool gotoflag;
    ae_int_t zrows;
    ae_bool wastranspose;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_e, 0, sizeof(_e));
    memset(&work1, 0, sizeof(work1));
    memset(&work2, 0, sizeof(work2));
    memset(&workc, 0, sizeof(workc));
    memset(&works, 0, sizeof(works));
    memset(&wtemp, 0, sizeof(wtemp));
    ae_vector_init_copy(&_e, e, _state, ae_true);
    e = &_e;
    ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&workc, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&works, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wtemp, 0, DT_REAL, _state, ae_true);

    ae_assert(zneeded>=0&&zneeded<=3, "TridiagonalEVD: Incorrent ZNeeded", _state);

    /*
     * Quick return if possible
     */
    if( zneeded<0||zneeded>3 )
    {
        result = ae_false;
        ae_frame_leave(_state);
        return result;
    }
    result = ae_true;
    if( n==0 )
    {
        ae_frame_leave(_state);
        return result;
    }
    if( n==1 )
    {
        if( zneeded==2||zneeded==3 )
        {
            ae_matrix_set_length(z, 1+1, 1+1, _state);
            z->ptr.pp_double[1][1] = (double)(1);
        }
        ae_frame_leave(_state);
        return result;
    }
    maxit = 30;

    /*
     * Initialize arrays
     */
    ae_vector_set_length(&wtemp, n+1, _state);
    ae_vector_set_length(&work1, n-1+1, _state);
    ae_vector_set_length(&work2, n-1+1, _state);
    ae_vector_set_length(&workc, n+1, _state);
    ae_vector_set_length(&works, n+1, _state);

    /*
     * Determine the unit roundoff and over/underflow thresholds.
     */
    eps = ae_machineepsilon;
    eps2 = ae_sqr(eps, _state);
    safmin = ae_minrealnumber;
    safmax = ae_maxrealnumber;
    ssfmax = ae_sqrt(safmax, _state)/3;
    ssfmin = ae_sqrt(safmin, _state)/eps2;

    /*
     * Prepare Z
     *
     * Here we are using transposition to get rid of column operations
     *
     */
    wastranspose = ae_false;
    zrows = 0;
    if( zneeded==1 )
    {
        zrows = n;
    }
    if( zneeded==2 )
    {
        zrows = n;
    }
    if( zneeded==3 )
    {
        zrows = 1;
    }
    if( zneeded==1 )
    {
        wastranspose = ae_true;
        inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
    }
    if( zneeded==2 )
    {
        wastranspose = ae_true;
        ae_matrix_set_length(z, n+1, n+1, _state);
        for(i=1; i<=n; i++)
        {
            for(j=1; j<=n; j++)
            {
                if( i==j )
                {
                    z->ptr.pp_double[i][j] = (double)(1);
                }
                else
                {
                    z->ptr.pp_double[i][j] = (double)(0);
                }
            }
        }
    }
    if( zneeded==3 )
    {
        wastranspose = ae_false;
        ae_matrix_set_length(z, 1+1, n+1, _state);
        for(j=1; j<=n; j++)
        {
            if( j==1 )
            {
                z->ptr.pp_double[1][j] = (double)(1);
            }
            else
            {
                z->ptr.pp_double[1][j] = (double)(0);
            }
        }
    }
    nmaxit = n*maxit;
    jtot = 0;

    /*
     * Determine where the matrix splits and choose QL or QR iteration
     * for each block, according to whether top or bottom diagonal
     * element is smaller.
     */
    l1 = 1;
    nm1 = n-1;
    for(;;)
    {
        if( l1>n )
        {
            break;
        }
        if( l1>1 )
        {
            e->ptr.p_double[l1-1] = (double)(0);
        }
        gotoflag = ae_false;
        m = l1;
        if( l1<=nm1 )
        {
            for(m=l1; m<=nm1; m++)
            {
                tst = ae_fabs(e->ptr.p_double[m], _state);
                if( ae_fp_eq(tst,(double)(0)) )
                {
                    gotoflag = ae_true;
                    break;
                }
                if( ae_fp_less_eq(tst,ae_sqrt(ae_fabs(d->ptr.p_double[m], _state), _state)*ae_sqrt(ae_fabs(d->ptr.p_double[m+1], _state), _state)*eps) )
                {
                    e->ptr.p_double[m] = (double)(0);
                    gotoflag = ae_true;
                    break;
                }
            }
        }
        if( !gotoflag )
        {
            m = n;
        }

        /*
         * label 30:
         */
        l = l1;
        lsv = l;
        lend = m;
        lendsv = lend;
        l1 = m+1;
        if( lend==l )
        {
            continue;
        }

        /*
         * Scale submatrix in rows and columns L to LEND
         */
        if( l==lend )
        {
            anorm = ae_fabs(d->ptr.p_double[l], _state);
        }
        else
        {
            anorm = ae_maxreal(ae_fabs(d->ptr.p_double[l], _state)+ae_fabs(e->ptr.p_double[l], _state), ae_fabs(e->ptr.p_double[lend-1], _state)+ae_fabs(d->ptr.p_double[lend], _state), _state);
            for(i=l+1; i<=lend-1; i++)
            {
                anorm = ae_maxreal(anorm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state), _state);
            }
        }
        iscale = 0;
        if( ae_fp_eq(anorm,(double)(0)) )
        {
            continue;
        }
        if( ae_fp_greater(anorm,ssfmax) )
        {
            iscale = 1;
            tmp = ssfmax/anorm;
            tmpint = lend-1;
            ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp);
            ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp);
        }
        if( ae_fp_less(anorm,ssfmin) )
        {
            iscale = 2;
            tmp = ssfmin/anorm;
            tmpint = lend-1;
            ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp);
            ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp);
        }

        /*
         * Choose between QL and QR iteration
         */
        if( ae_fp_less(ae_fabs(d->ptr.p_double[lend], _state),ae_fabs(d->ptr.p_double[l], _state)) )
        {
            lend = lsv;
            l = lendsv;
        }
        if( lend>l )
        {

            /*
             * QL Iteration
             *
             * Look for small subdiagonal element.
             */
            for(;;)
            {
                gotoflag = ae_false;
                if( l!=lend )
                {
                    lendm1 = lend-1;
                    for(m=l; m<=lendm1; m++)
                    {
                        tst = ae_sqr(ae_fabs(e->ptr.p_double[m], _state), _state);
                        if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m+1], _state)+safmin) )
                        {
                            gotoflag = ae_true;
                            break;
                        }
                    }
                }
                if( !gotoflag )
                {
                    m = lend;
                }
                if( m<lend )
                {
                    e->ptr.p_double[m] = (double)(0);
                }
                p = d->ptr.p_double[l];
                if( m!=l )
                {

                    /*
                     * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
                     * to compute its eigensystem.
                     */
                    if( m==l+1 )
                    {
                        if( zneeded>0 )
                        {
                            evd_tdevdev2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, &c, &s, _state);
                            work1.ptr.p_double[l] = c;
                            work2.ptr.p_double[l] = s;
                            workc.ptr.p_double[1] = work1.ptr.p_double[l];
                            works.ptr.p_double[1] = work2.ptr.p_double[l];
                            if( !wastranspose )
                            {
                                applyrotationsfromtheright(ae_false, 1, zrows, l, l+1, &workc, &works, z, &wtemp, _state);
                            }
                            else
                            {
                                applyrotationsfromtheleft(ae_false, l, l+1, 1, zrows, &workc, &works, z, &wtemp, _state);
                            }
                        }
                        else
                        {
                            evd_tdevde2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, _state);
                        }
                        d->ptr.p_double[l] = rt1;
                        d->ptr.p_double[l+1] = rt2;
                        e->ptr.p_double[l] = (double)(0);
                        l = l+2;
                        if( l<=lend )
                        {
                            continue;
                        }

                        /*
                         * GOTO 140
                         */
                        break;
                    }
                    if( jtot==nmaxit )
                    {

                        /*
                         * GOTO 140
                         */
                        break;
                    }
                    jtot = jtot+1;

                    /*
                     * Form shift.
                     */
                    g = (d->ptr.p_double[l+1]-p)/(2*e->ptr.p_double[l]);
                    r = evd_tdevdpythag(g, (double)(1), _state);
                    g = d->ptr.p_double[m]-p+e->ptr.p_double[l]/(g+evd_tdevdextsign(r, g, _state));
                    s = (double)(1);
                    c = (double)(1);
                    p = (double)(0);

                    /*
                     * Inner loop
                     */
                    mm1 = m-1;
                    for(i=mm1; i>=l; i--)
                    {
                        f = s*e->ptr.p_double[i];
                        b = c*e->ptr.p_double[i];
                        generaterotation(g, f, &c, &s, &r, _state);
                        if( i!=m-1 )
                        {
                            e->ptr.p_double[i+1] = r;
                        }
                        g = d->ptr.p_double[i+1]-p;
                        r = (d->ptr.p_double[i]-g)*s+2*c*b;
                        p = s*r;
                        d->ptr.p_double[i+1] = g+p;
                        g = c*r-b;

                        /*
                         * If eigenvectors are desired, then save rotations.
                         */
                        if( zneeded>0 )
                        {
                            work1.ptr.p_double[i] = c;
                            work2.ptr.p_double[i] = -s;
                        }
                    }

                    /*
                     * If eigenvectors are desired, then apply saved rotations.
                     */
                    if( zneeded>0 )
                    {
                        for(i=l; i<=m-1; i++)
                        {
                            workc.ptr.p_double[i-l+1] = work1.ptr.p_double[i];
                            works.ptr.p_double[i-l+1] = work2.ptr.p_double[i];
                        }
                        if( !wastranspose )
                        {
                            applyrotationsfromtheright(ae_false, 1, zrows, l, m, &workc, &works, z, &wtemp, _state);
                        }
                        else
                        {
                            applyrotationsfromtheleft(ae_false, l, m, 1, zrows, &workc, &works, z, &wtemp, _state);
                        }
                    }
                    d->ptr.p_double[l] = d->ptr.p_double[l]-p;
                    e->ptr.p_double[l] = g;
                    continue;
                }

                /*
                 * Eigenvalue found.
                 */
                d->ptr.p_double[l] = p;
                l = l+1;
                if( l<=lend )
                {
                    continue;
                }
                break;
            }
        }
        else
        {

            /*
             * QR Iteration
             *
             * Look for small superdiagonal element.
             */
            for(;;)
            {
                gotoflag = ae_false;
                if( l!=lend )
                {
                    lendp1 = lend+1;
                    for(m=l; m>=lendp1; m--)
                    {
                        tst = ae_sqr(ae_fabs(e->ptr.p_double[m-1], _state), _state);
                        if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m-1], _state)+safmin) )
                        {
                            gotoflag = ae_true;
                            break;
                        }
                    }
                }
                if( !gotoflag )
                {
                    m = lend;
                }
                if( m>lend )
                {
                    e->ptr.p_double[m-1] = (double)(0);
                }
                p = d->ptr.p_double[l];
                if( m!=l )
                {

                    /*
                     * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
                     * to compute its eigensystem.
                     */
                    if( m==l-1 )
                    {
                        if( zneeded>0 )
                        {
                            evd_tdevdev2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, &c, &s, _state);
                            work1.ptr.p_double[m] = c;
                            work2.ptr.p_double[m] = s;
                            workc.ptr.p_double[1] = c;
                            works.ptr.p_double[1] = s;
                            if( !wastranspose )
                            {
                                applyrotationsfromtheright(ae_true, 1, zrows, l-1, l, &workc, &works, z, &wtemp, _state);
                            }
                            else
                            {
                                applyrotationsfromtheleft(ae_true, l-1, l, 1, zrows, &workc, &works, z, &wtemp, _state);
                            }
                        }
                        else
                        {
                            evd_tdevde2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, _state);
                        }
                        d->ptr.p_double[l-1] = rt1;
                        d->ptr.p_double[l] = rt2;
                        e->ptr.p_double[l-1] = (double)(0);
                        l = l-2;
                        if( l>=lend )
                        {
                            continue;
                        }
                        break;
                    }
                    if( jtot==nmaxit )
                    {
                        break;
                    }
                    jtot = jtot+1;

                    /*
                     * Form shift.
                     */
                    g = (d->ptr.p_double[l-1]-p)/(2*e->ptr.p_double[l-1]);
                    r = evd_tdevdpythag(g, (double)(1), _state);
                    g = d->ptr.p_double[m]-p+e->ptr.p_double[l-1]/(g+evd_tdevdextsign(r, g, _state));
                    s = (double)(1);
                    c = (double)(1);
                    p = (double)(0);

                    /*
                     * Inner loop
                     */
                    lm1 = l-1;
                    for(i=m; i<=lm1; i++)
                    {
                        f = s*e->ptr.p_double[i];
                        b = c*e->ptr.p_double[i];
                        generaterotation(g, f, &c, &s, &r, _state);
                        if( i!=m )
                        {
                            e->ptr.p_double[i-1] = r;
                        }
                        g = d->ptr.p_double[i]-p;
                        r = (d->ptr.p_double[i+1]-g)*s+2*c*b;
                        p = s*r;
                        d->ptr.p_double[i] = g+p;
                        g = c*r-b;

                        /*
                         * If eigenvectors are desired, then save rotations.
                         */
                        if( zneeded>0 )
                        {
                            work1.ptr.p_double[i] = c;
                            work2.ptr.p_double[i] = s;
                        }
                    }

                    /*
                     * If eigenvectors are desired, then apply saved rotations.
                     */
                    if( zneeded>0 )
                    {
                        for(i=m; i<=l-1; i++)
                        {
                            workc.ptr.p_double[i-m+1] = work1.ptr.p_double[i];
                            works.ptr.p_double[i-m+1] = work2.ptr.p_double[i];
                        }
                        if( !wastranspose )
                        {
                            applyrotationsfromtheright(ae_true, 1, zrows, m, l, &workc, &works, z, &wtemp, _state);
                        }
                        else
                        {
                            applyrotationsfromtheleft(ae_true, m, l, 1, zrows, &workc, &works, z, &wtemp, _state);
                        }
                    }
                    d->ptr.p_double[l] = d->ptr.p_double[l]-p;
                    e->ptr.p_double[lm1] = g;
                    continue;
                }

                /*
                 * Eigenvalue found.
                 */
                d->ptr.p_double[l] = p;
                l = l-1;
                if( l>=lend )
                {
                    continue;
                }
                break;
            }
        }

        /*
         * Undo scaling if necessary
         */
        if( iscale==1 )
        {
            tmp = anorm/ssfmax;
            tmpint = lendsv-1;
            ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp);
            ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp);
        }
        if( iscale==2 )
        {
            tmp = anorm/ssfmin;
            tmpint = lendsv-1;
            ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp);
            ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp);
        }

        /*
         * Check for no convergence to an eigenvalue after a total
         * of N*MAXIT iterations.
         */
        if( jtot>=nmaxit )
        {
            result = ae_false;
            if( wastranspose )
            {
                inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
            }
            ae_frame_leave(_state);
            return result;
        }
    }

    /*
     * Order eigenvalues and eigenvectors.
     */
    if( zneeded==0 )
    {

        /*
         * Sort
         */
        if( n==1 )
        {
            ae_frame_leave(_state);
            return result;
        }
        if( n==2 )
        {
            if( ae_fp_greater(d->ptr.p_double[1],d->ptr.p_double[2]) )
            {
                tmp = d->ptr.p_double[1];
                d->ptr.p_double[1] = d->ptr.p_double[2];
                d->ptr.p_double[2] = tmp;
            }
            ae_frame_leave(_state);
            return result;
        }
        i = 2;
        do
        {
            t = i;
            while(t!=1)
            {
                k = t/2;
                if( ae_fp_greater_eq(d->ptr.p_double[k],d->ptr.p_double[t]) )
                {
                    t = 1;
                }
                else
                {
                    tmp = d->ptr.p_double[k];
                    d->ptr.p_double[k] = d->ptr.p_double[t];
                    d->ptr.p_double[t] = tmp;
                    t = k;
                }
            }
            i = i+1;
        }
        while(i<=n);
        i = n-1;
        do
        {
            tmp = d->ptr.p_double[i+1];
            d->ptr.p_double[i+1] = d->ptr.p_double[1];
            d->ptr.p_double[1] = tmp;
            t = 1;
            while(t!=0)
            {
                k = 2*t;
                if( k>i )
                {
                    t = 0;
                }
                else
                {
                    if( k<i )
                    {
                        if( ae_fp_greater(d->ptr.p_double[k+1],d->ptr.p_double[k]) )
                        {
                            k = k+1;
                        }
                    }
                    if( ae_fp_greater_eq(d->ptr.p_double[t],d->ptr.p_double[k]) )
                    {
                        t = 0;
                    }
                    else
                    {
                        tmp = d->ptr.p_double[k];
                        d->ptr.p_double[k] = d->ptr.p_double[t];
                        d->ptr.p_double[t] = tmp;
                        t = k;
                    }
                }
            }
            i = i-1;
        }
        while(i>=1);
    }
    else
    {

        /*
         * Use Selection Sort to minimize swaps of eigenvectors
         */
        for(ii=2; ii<=n; ii++)
        {
            i = ii-1;
            k = i;
            p = d->ptr.p_double[i];
            for(j=ii; j<=n; j++)
            {
                if( ae_fp_less(d->ptr.p_double[j],p) )
                {
                    k = j;
                    p = d->ptr.p_double[j];
                }
            }
            if( k!=i )
            {
                d->ptr.p_double[k] = d->ptr.p_double[i];
                d->ptr.p_double[i] = p;
                if( wastranspose )
                {
                    ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[i][1], 1, ae_v_len(1,n));
                    ae_v_move(&z->ptr.pp_double[i][1], 1, &z->ptr.pp_double[k][1], 1, ae_v_len(1,n));
                    ae_v_move(&z->ptr.pp_double[k][1], 1, &wtemp.ptr.p_double[1], 1, ae_v_len(1,n));
                }
                else
                {
                    ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[1][i], z->stride, ae_v_len(1,zrows));
                    ae_v_move(&z->ptr.pp_double[1][i], z->stride, &z->ptr.pp_double[1][k], z->stride, ae_v_len(1,zrows));
                    ae_v_move(&z->ptr.pp_double[1][k], z->stride, &wtemp.ptr.p_double[1], 1, ae_v_len(1,zrows));
                }
            }
        }
        if( wastranspose )
        {
            inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
        }
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
   [  A   B  ]
   [  B   C  ].
On return, RT1 is the eigenvalue of larger absolute value, and RT2
is the eigenvalue of smaller absolute value.

  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
static void evd_tdevde2(double a,
     double b,
     double c,
     double* rt1,
     double* rt2,
     ae_state *_state)
{
    double ab;
    double acmn;
    double acmx;
    double adf;
    double df;
    double rt;
    double sm;
    double tb;

    *rt1 = 0;
    *rt2 = 0;

    sm = a+c;
    df = a-c;
    adf = ae_fabs(df, _state);
    tb = b+b;
    ab = ae_fabs(tb, _state);
    if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) )
    {
        acmx = a;
        acmn = c;
    }
    else
    {
        acmx = c;
        acmn = a;
    }
    if( ae_fp_greater(adf,ab) )
    {
        rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state);
    }
    else
    {
        if( ae_fp_less(adf,ab) )
        {
            rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state);
        }
        else
        {

            /*
             * Includes case AB=ADF=0
             */
            rt = ab*ae_sqrt((double)(2), _state);
        }
    }
    if( ae_fp_less(sm,(double)(0)) )
    {
        *rt1 = 0.5*(sm-rt);

        /*
         * Order of execution important.
         * To get fully accurate smaller eigenvalue,
         * next line needs to be executed in higher precision.
         */
        *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
    }
    else
    {
        if( ae_fp_greater(sm,(double)(0)) )
        {
            *rt1 = 0.5*(sm+rt);

            /*
             * Order of execution important.
             * To get fully accurate smaller eigenvalue,
             * next line needs to be executed in higher precision.
             */
            *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
        }
        else
        {

            /*
             * Includes case RT1 = RT2 = 0
             */
            *rt1 = 0.5*rt;
            *rt2 = -0.5*rt;
        }
    }
}


/*************************************************************************
DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix

   [  A   B  ]
   [  B   C  ].

On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
eigenvector for RT1, giving the decomposition

   [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
   [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].


  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
static void evd_tdevdev2(double a,
     double b,
     double c,
     double* rt1,
     double* rt2,
     double* cs1,
     double* sn1,
     ae_state *_state)
{
    ae_int_t sgn1;
    ae_int_t sgn2;
    double ab;
    double acmn;
    double acmx;
    double acs;
    double adf;
    double cs;
    double ct;
    double df;
    double rt;
    double sm;
    double tb;
    double tn;

    *rt1 = 0;
    *rt2 = 0;
    *cs1 = 0;
    *sn1 = 0;


    /*
     * Compute the eigenvalues
     */
    sm = a+c;
    df = a-c;
    adf = ae_fabs(df, _state);
    tb = b+b;
    ab = ae_fabs(tb, _state);
    if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) )
    {
        acmx = a;
        acmn = c;
    }
    else
    {
        acmx = c;
        acmn = a;
    }
    if( ae_fp_greater(adf,ab) )
    {
        rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state);
    }
    else
    {
        if( ae_fp_less(adf,ab) )
        {
            rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state);
        }
        else
        {

            /*
             * Includes case AB=ADF=0
             */
            rt = ab*ae_sqrt((double)(2), _state);
        }
    }
    if( ae_fp_less(sm,(double)(0)) )
    {
        *rt1 = 0.5*(sm-rt);
        sgn1 = -1;

        /*
         * Order of execution important.
         * To get fully accurate smaller eigenvalue,
         * next line needs to be executed in higher precision.
         */
        *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
    }
    else
    {
        if( ae_fp_greater(sm,(double)(0)) )
        {
            *rt1 = 0.5*(sm+rt);
            sgn1 = 1;

            /*
             * Order of execution important.
             * To get fully accurate smaller eigenvalue,
             * next line needs to be executed in higher precision.
             */
            *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
        }
        else
        {

            /*
             * Includes case RT1 = RT2 = 0
             */
            *rt1 = 0.5*rt;
            *rt2 = -0.5*rt;
            sgn1 = 1;
        }
    }

    /*
     * Compute the eigenvector
     */
    if( ae_fp_greater_eq(df,(double)(0)) )
    {
        cs = df+rt;
        sgn2 = 1;
    }
    else
    {
        cs = df-rt;
        sgn2 = -1;
    }
    acs = ae_fabs(cs, _state);
    if( ae_fp_greater(acs,ab) )
    {
        ct = -tb/cs;
        *sn1 = 1/ae_sqrt(1+ct*ct, _state);
        *cs1 = ct*(*sn1);
    }
    else
    {
        if( ae_fp_eq(ab,(double)(0)) )
        {
            *cs1 = (double)(1);
            *sn1 = (double)(0);
        }
        else
        {
            tn = -cs/tb;
            *cs1 = 1/ae_sqrt(1+tn*tn, _state);
            *sn1 = tn*(*cs1);
        }
    }
    if( sgn1==sgn2 )
    {
        tn = *cs1;
        *cs1 = -*sn1;
        *sn1 = tn;
    }
}


/*************************************************************************
Internal routine
*************************************************************************/
static double evd_tdevdpythag(double a, double b, ae_state *_state)
{
    double result;


    if( ae_fp_less(ae_fabs(a, _state),ae_fabs(b, _state)) )
    {
        result = ae_fabs(b, _state)*ae_sqrt(1+ae_sqr(a/b, _state), _state);
    }
    else
    {
        result = ae_fabs(a, _state)*ae_sqrt(1+ae_sqr(b/a, _state), _state);
    }
    return result;
}


/*************************************************************************
Internal routine
*************************************************************************/
static double evd_tdevdextsign(double a, double b, ae_state *_state)
{
    double result;


    if( ae_fp_greater_eq(b,(double)(0)) )
    {
        result = ae_fabs(a, _state);
    }
    else
    {
        result = -ae_fabs(a, _state);
    }
    return result;
}


static ae_bool evd_internalbisectioneigenvalues(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_int_t irange,
     ae_int_t iorder,
     double vl,
     double vu,
     ae_int_t il,
     ae_int_t iu,
     double abstol,
     /* Real    */ ae_vector* w,
     ae_int_t* m,
     ae_int_t* nsplit,
     /* Integer */ ae_vector* iblock,
     /* Integer */ ae_vector* isplit,
     ae_int_t* errorcode,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _d;
    ae_vector _e;
    double fudge;
    double relfac;
    ae_bool ncnvrg;
    ae_bool toofew;
    ae_int_t ib;
    ae_int_t ibegin;
    ae_int_t idiscl;
    ae_int_t idiscu;
    ae_int_t ie;
    ae_int_t iend;
    ae_int_t iinfo;
    ae_int_t im;
    ae_int_t iin;
    ae_int_t ioff;
    ae_int_t iout;
    ae_int_t itmax;
    ae_int_t iw;
    ae_int_t iwoff;
    ae_int_t j;
    ae_int_t itmp1;
    ae_int_t jb;
    ae_int_t jdisc;
    ae_int_t je;
    ae_int_t nwl;
    ae_int_t nwu;
    double atoli;
    double bnorm;
    double gl;
    double gu;
    double pivmin;
    double rtoli;
    double safemn;
    double tmp1;
    double tmp2;
    double tnorm;
    double ulp;
    double wkill;
    double wl;
    double wlu;
    double wu;
    double wul;
    double scalefactor;
    double t;
    ae_vector idumma;
    ae_vector work;
    ae_vector iwork;
    ae_vector ia1s2;
    ae_vector ra1s2;
    ae_matrix ra1s2x2;
    ae_matrix ia1s2x2;
    ae_vector ra1siin;
    ae_vector ra2siin;
    ae_vector ra3siin;
    ae_vector ra4siin;
    ae_matrix ra1siinx2;
    ae_matrix ia1siinx2;
    ae_vector iworkspace;
    ae_vector rworkspace;
    ae_int_t tmpi;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_d, 0, sizeof(_d));
    memset(&_e, 0, sizeof(_e));
    memset(&idumma, 0, sizeof(idumma));
    memset(&work, 0, sizeof(work));
    memset(&iwork, 0, sizeof(iwork));
    memset(&ia1s2, 0, sizeof(ia1s2));
    memset(&ra1s2, 0, sizeof(ra1s2));
    memset(&ra1s2x2, 0, sizeof(ra1s2x2));
    memset(&ia1s2x2, 0, sizeof(ia1s2x2));
    memset(&ra1siin, 0, sizeof(ra1siin));
    memset(&ra2siin, 0, sizeof(ra2siin));
    memset(&ra3siin, 0, sizeof(ra3siin));
    memset(&ra4siin, 0, sizeof(ra4siin));
    memset(&ra1siinx2, 0, sizeof(ra1siinx2));
    memset(&ia1siinx2, 0, sizeof(ia1siinx2));
    memset(&iworkspace, 0, sizeof(iworkspace));
    memset(&rworkspace, 0, sizeof(rworkspace));
    ae_vector_init_copy(&_d, d, _state, ae_true);
    d = &_d;
    ae_vector_init_copy(&_e, e, _state, ae_true);
    e = &_e;
    ae_vector_clear(w);
    *m = 0;
    *nsplit = 0;
    ae_vector_clear(iblock);
    ae_vector_clear(isplit);
    *errorcode = 0;
    ae_vector_init(&idumma, 0, DT_INT, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
    ae_vector_init(&ia1s2, 0, DT_INT, _state, ae_true);
    ae_vector_init(&ra1s2, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&ra1s2x2, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&ia1s2x2, 0, 0, DT_INT, _state, ae_true);
    ae_vector_init(&ra1siin, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ra2siin, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ra3siin, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ra4siin, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&ra1siinx2, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&ia1siinx2, 0, 0, DT_INT, _state, ae_true);
    ae_vector_init(&iworkspace, 0, DT_INT, _state, ae_true);
    ae_vector_init(&rworkspace, 0, DT_REAL, _state, ae_true);


    /*
     * Quick return if possible
     */
    *m = 0;
    if( n==0 )
    {
        result = ae_true;
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Get machine constants
     * NB is the minimum vector length for vector bisection, or 0
     * if only scalar is to be done.
     */
    fudge = (double)(2);
    relfac = (double)(2);
    safemn = ae_minrealnumber;
    ulp = 2*ae_machineepsilon;
    rtoli = ulp*relfac;
    ae_vector_set_length(&idumma, 1+1, _state);
    ae_vector_set_length(&work, 4*n+1, _state);
    ae_vector_set_length(&iwork, 3*n+1, _state);
    ae_vector_set_length(w, n+1, _state);
    ae_vector_set_length(iblock, n+1, _state);
    ae_vector_set_length(isplit, n+1, _state);
    ae_vector_set_length(&ia1s2, 2+1, _state);
    ae_vector_set_length(&ra1s2, 2+1, _state);
    ae_matrix_set_length(&ra1s2x2, 2+1, 2+1, _state);
    ae_matrix_set_length(&ia1s2x2, 2+1, 2+1, _state);
    ae_vector_set_length(&ra1siin, n+1, _state);
    ae_vector_set_length(&ra2siin, n+1, _state);
    ae_vector_set_length(&ra3siin, n+1, _state);
    ae_vector_set_length(&ra4siin, n+1, _state);
    ae_matrix_set_length(&ra1siinx2, n+1, 2+1, _state);
    ae_matrix_set_length(&ia1siinx2, n+1, 2+1, _state);
    ae_vector_set_length(&iworkspace, n+1, _state);
    ae_vector_set_length(&rworkspace, n+1, _state);

    /*
     * these initializers are not really necessary,
     * but without them compiler complains about uninitialized locals
     */
    wlu = (double)(0);
    wul = (double)(0);

    /*
     * Check for Errors
     */
    result = ae_false;
    *errorcode = 0;
    if( irange<=0||irange>=4 )
    {
        *errorcode = -4;
    }
    if( iorder<=0||iorder>=3 )
    {
        *errorcode = -5;
    }
    if( n<0 )
    {
        *errorcode = -3;
    }
    if( irange==2&&ae_fp_greater_eq(vl,vu) )
    {
        *errorcode = -6;
    }
    if( irange==3&&(il<1||il>ae_maxint(1, n, _state)) )
    {
        *errorcode = -8;
    }
    if( irange==3&&(iu<ae_minint(n, il, _state)||iu>n) )
    {
        *errorcode = -9;
    }
    if( *errorcode!=0 )
    {
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Initialize error flags
     */
    ncnvrg = ae_false;
    toofew = ae_false;

    /*
     * Simplifications:
     */
    if( (irange==3&&il==1)&&iu==n )
    {
        irange = 1;
    }

    /*
     * Special Case when N=1
     */
    if( n==1 )
    {
        *nsplit = 1;
        isplit->ptr.p_int[1] = 1;
        if( irange==2&&(ae_fp_greater_eq(vl,d->ptr.p_double[1])||ae_fp_less(vu,d->ptr.p_double[1])) )
        {
            *m = 0;
        }
        else
        {
            w->ptr.p_double[1] = d->ptr.p_double[1];
            iblock->ptr.p_int[1] = 1;
            *m = 1;
        }
        result = ae_true;
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Scaling
     */
    t = ae_fabs(d->ptr.p_double[n], _state);
    for(j=1; j<=n-1; j++)
    {
        t = ae_maxreal(t, ae_fabs(d->ptr.p_double[j], _state), _state);
        t = ae_maxreal(t, ae_fabs(e->ptr.p_double[j], _state), _state);
    }
    scalefactor = (double)(1);
    if( ae_fp_neq(t,(double)(0)) )
    {
        if( ae_fp_greater(t,ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state)*ae_sqrt(ae_maxrealnumber, _state)) )
        {
            scalefactor = t;
        }
        if( ae_fp_less(t,ae_sqrt(ae_sqrt(ae_maxrealnumber, _state), _state)*ae_sqrt(ae_minrealnumber, _state)) )
        {
            scalefactor = t;
        }
        for(j=1; j<=n-1; j++)
        {
            d->ptr.p_double[j] = d->ptr.p_double[j]/scalefactor;
            e->ptr.p_double[j] = e->ptr.p_double[j]/scalefactor;
        }
        d->ptr.p_double[n] = d->ptr.p_double[n]/scalefactor;
    }

    /*
     * Compute Splitting Points
     */
    *nsplit = 1;
    work.ptr.p_double[n] = (double)(0);
    pivmin = (double)(1);
    for(j=2; j<=n; j++)
    {
        tmp1 = ae_sqr(e->ptr.p_double[j-1], _state);
        if( ae_fp_greater(ae_fabs(d->ptr.p_double[j]*d->ptr.p_double[j-1], _state)*ae_sqr(ulp, _state)+safemn,tmp1) )
        {
            isplit->ptr.p_int[*nsplit] = j-1;
            *nsplit = *nsplit+1;
            work.ptr.p_double[j-1] = (double)(0);
        }
        else
        {
            work.ptr.p_double[j-1] = tmp1;
            pivmin = ae_maxreal(pivmin, tmp1, _state);
        }
    }
    isplit->ptr.p_int[*nsplit] = n;
    pivmin = pivmin*safemn;

    /*
     * Compute Interval and ATOLI
     */
    if( irange==3 )
    {

        /*
         * RANGE='I': Compute the interval containing eigenvalues
         *     IL through IU.
         *
         * Compute Gershgorin interval for entire (split) matrix
         * and use it as the initial interval
         */
        gu = d->ptr.p_double[1];
        gl = d->ptr.p_double[1];
        tmp1 = (double)(0);
        for(j=1; j<=n-1; j++)
        {
            tmp2 = ae_sqrt(work.ptr.p_double[j], _state);
            gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state);
            gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state);
            tmp1 = tmp2;
        }
        gu = ae_maxreal(gu, d->ptr.p_double[n]+tmp1, _state);
        gl = ae_minreal(gl, d->ptr.p_double[n]-tmp1, _state);
        tnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
        gl = gl-fudge*tnorm*ulp*n-fudge*2*pivmin;
        gu = gu+fudge*tnorm*ulp*n+fudge*pivmin;

        /*
         * Compute Iteration parameters
         */
        itmax = ae_iceil((ae_log(tnorm+pivmin, _state)-ae_log(pivmin, _state))/ae_log((double)(2), _state), _state)+2;
        if( ae_fp_less_eq(abstol,(double)(0)) )
        {
            atoli = ulp*tnorm;
        }
        else
        {
            atoli = abstol;
        }
        work.ptr.p_double[n+1] = gl;
        work.ptr.p_double[n+2] = gl;
        work.ptr.p_double[n+3] = gu;
        work.ptr.p_double[n+4] = gu;
        work.ptr.p_double[n+5] = gl;
        work.ptr.p_double[n+6] = gu;
        iwork.ptr.p_int[1] = -1;
        iwork.ptr.p_int[2] = -1;
        iwork.ptr.p_int[3] = n+1;
        iwork.ptr.p_int[4] = n+1;
        iwork.ptr.p_int[5] = il-1;
        iwork.ptr.p_int[6] = iu;

        /*
         * Calling DLAEBZ
         *
         * DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E,
         *    WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT,
         *    IWORK, W, IBLOCK, IINFO )
         */
        ia1s2.ptr.p_int[1] = iwork.ptr.p_int[5];
        ia1s2.ptr.p_int[2] = iwork.ptr.p_int[6];
        ra1s2.ptr.p_double[1] = work.ptr.p_double[n+5];
        ra1s2.ptr.p_double[2] = work.ptr.p_double[n+6];
        ra1s2x2.ptr.pp_double[1][1] = work.ptr.p_double[n+1];
        ra1s2x2.ptr.pp_double[2][1] = work.ptr.p_double[n+2];
        ra1s2x2.ptr.pp_double[1][2] = work.ptr.p_double[n+3];
        ra1s2x2.ptr.pp_double[2][2] = work.ptr.p_double[n+4];
        ia1s2x2.ptr.pp_int[1][1] = iwork.ptr.p_int[1];
        ia1s2x2.ptr.pp_int[2][1] = iwork.ptr.p_int[2];
        ia1s2x2.ptr.pp_int[1][2] = iwork.ptr.p_int[3];
        ia1s2x2.ptr.pp_int[2][2] = iwork.ptr.p_int[4];
        evd_internaldlaebz(3, itmax, n, 2, 2, atoli, rtoli, pivmin, d, e, &work, &ia1s2, &ra1s2x2, &ra1s2, &iout, &ia1s2x2, w, iblock, &iinfo, _state);
        iwork.ptr.p_int[5] = ia1s2.ptr.p_int[1];
        iwork.ptr.p_int[6] = ia1s2.ptr.p_int[2];
        work.ptr.p_double[n+5] = ra1s2.ptr.p_double[1];
        work.ptr.p_double[n+6] = ra1s2.ptr.p_double[2];
        work.ptr.p_double[n+1] = ra1s2x2.ptr.pp_double[1][1];
        work.ptr.p_double[n+2] = ra1s2x2.ptr.pp_double[2][1];
        work.ptr.p_double[n+3] = ra1s2x2.ptr.pp_double[1][2];
        work.ptr.p_double[n+4] = ra1s2x2.ptr.pp_double[2][2];
        iwork.ptr.p_int[1] = ia1s2x2.ptr.pp_int[1][1];
        iwork.ptr.p_int[2] = ia1s2x2.ptr.pp_int[2][1];
        iwork.ptr.p_int[3] = ia1s2x2.ptr.pp_int[1][2];
        iwork.ptr.p_int[4] = ia1s2x2.ptr.pp_int[2][2];
        if( iwork.ptr.p_int[6]==iu )
        {
            wl = work.ptr.p_double[n+1];
            wlu = work.ptr.p_double[n+3];
            nwl = iwork.ptr.p_int[1];
            wu = work.ptr.p_double[n+4];
            wul = work.ptr.p_double[n+2];
            nwu = iwork.ptr.p_int[4];
        }
        else
        {
            wl = work.ptr.p_double[n+2];
            wlu = work.ptr.p_double[n+4];
            nwl = iwork.ptr.p_int[2];
            wu = work.ptr.p_double[n+3];
            wul = work.ptr.p_double[n+1];
            nwu = iwork.ptr.p_int[3];
        }
        if( ((nwl<0||nwl>=n)||nwu<1)||nwu>n )
        {
            *errorcode = 4;
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }
    }
    else
    {

        /*
         * RANGE='A' or 'V' -- Set ATOLI
         */
        tnorm = ae_maxreal(ae_fabs(d->ptr.p_double[1], _state)+ae_fabs(e->ptr.p_double[1], _state), ae_fabs(d->ptr.p_double[n], _state)+ae_fabs(e->ptr.p_double[n-1], _state), _state);
        for(j=2; j<=n-1; j++)
        {
            tnorm = ae_maxreal(tnorm, ae_fabs(d->ptr.p_double[j], _state)+ae_fabs(e->ptr.p_double[j-1], _state)+ae_fabs(e->ptr.p_double[j], _state), _state);
        }
        if( ae_fp_less_eq(abstol,(double)(0)) )
        {
            atoli = ulp*tnorm;
        }
        else
        {
            atoli = abstol;
        }
        if( irange==2 )
        {
            wl = vl;
            wu = vu;
        }
        else
        {
            wl = (double)(0);
            wu = (double)(0);
        }
    }

    /*
     * Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU.
     * NWL accumulates the number of eigenvalues .le. WL,
     * NWU accumulates the number of eigenvalues .le. WU
     */
    *m = 0;
    iend = 0;
    *errorcode = 0;
    nwl = 0;
    nwu = 0;
    for(jb=1; jb<=*nsplit; jb++)
    {
        ioff = iend;
        ibegin = ioff+1;
        iend = isplit->ptr.p_int[jb];
        iin = iend-ioff;
        if( iin==1 )
        {

            /*
             * Special Case -- IIN=1
             */
            if( irange==1||ae_fp_greater_eq(wl,d->ptr.p_double[ibegin]-pivmin) )
            {
                nwl = nwl+1;
            }
            if( irange==1||ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin) )
            {
                nwu = nwu+1;
            }
            if( irange==1||(ae_fp_less(wl,d->ptr.p_double[ibegin]-pivmin)&&ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin)) )
            {
                *m = *m+1;
                w->ptr.p_double[*m] = d->ptr.p_double[ibegin];
                iblock->ptr.p_int[*m] = jb;
            }
        }
        else
        {

            /*
             * General Case -- IIN > 1
             *
             * Compute Gershgorin Interval
             * and use it as the initial interval
             */
            gu = d->ptr.p_double[ibegin];
            gl = d->ptr.p_double[ibegin];
            tmp1 = (double)(0);
            for(j=ibegin; j<=iend-1; j++)
            {
                tmp2 = ae_fabs(e->ptr.p_double[j], _state);
                gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state);
                gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state);
                tmp1 = tmp2;
            }
            gu = ae_maxreal(gu, d->ptr.p_double[iend]+tmp1, _state);
            gl = ae_minreal(gl, d->ptr.p_double[iend]-tmp1, _state);
            bnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
            gl = gl-fudge*bnorm*ulp*iin-fudge*pivmin;
            gu = gu+fudge*bnorm*ulp*iin+fudge*pivmin;

            /*
             * Compute ATOLI for the current submatrix
             */
            if( ae_fp_less_eq(abstol,(double)(0)) )
            {
                atoli = ulp*ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
            }
            else
            {
                atoli = abstol;
            }
            if( irange>1 )
            {
                if( ae_fp_less(gu,wl) )
                {
                    nwl = nwl+iin;
                    nwu = nwu+iin;
                    continue;
                }
                gl = ae_maxreal(gl, wl, _state);
                gu = ae_minreal(gu, wu, _state);
                if( ae_fp_greater_eq(gl,gu) )
                {
                    continue;
                }
            }

            /*
             * Set Up Initial Interval
             */
            work.ptr.p_double[n+1] = gl;
            work.ptr.p_double[n+iin+1] = gu;

            /*
             * Calling DLAEBZ
             *
             * CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
             *    D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
             *    IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM,
             *    IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
             */
            for(tmpi=1; tmpi<=iin; tmpi++)
            {
                ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi];
                if( ibegin-1+tmpi<n )
                {
                    ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi];
                }
                ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi];
                ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi];
                ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin];
                ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi];
                rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi];
                iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi];
                ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi];
                ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin];
            }
            evd_internaldlaebz(1, 0, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &im, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state);
            for(tmpi=1; tmpi<=iin; tmpi++)
            {
                work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1];
                work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2];
                work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi];
                w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi];
                iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi];
                iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1];
                iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2];
            }
            nwl = nwl+iwork.ptr.p_int[1];
            nwu = nwu+iwork.ptr.p_int[iin+1];
            iwoff = *m-iwork.ptr.p_int[1];

            /*
             * Compute Eigenvalues
             */
            itmax = ae_iceil((ae_log(gu-gl+pivmin, _state)-ae_log(pivmin, _state))/ae_log((double)(2), _state), _state)+2;

            /*
             * Calling DLAEBZ
             *
             *CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
             *    D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
             *    IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT,
             *    IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
             */
            for(tmpi=1; tmpi<=iin; tmpi++)
            {
                ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi];
                if( ibegin-1+tmpi<n )
                {
                    ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi];
                }
                ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi];
                ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi];
                ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin];
                ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi];
                rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi];
                iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi];
                ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi];
                ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin];
            }
            evd_internaldlaebz(2, itmax, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &iout, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state);
            for(tmpi=1; tmpi<=iin; tmpi++)
            {
                work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1];
                work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2];
                work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi];
                w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi];
                iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi];
                iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1];
                iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2];
            }

            /*
             * Copy Eigenvalues Into W and IBLOCK
             * Use -JB for block number for unconverged eigenvalues.
             */
            for(j=1; j<=iout; j++)
            {
                tmp1 = 0.5*(work.ptr.p_double[j+n]+work.ptr.p_double[j+iin+n]);

                /*
                 * Flag non-convergence.
                 */
                if( j>iout-iinfo )
                {
                    ncnvrg = ae_true;
                    ib = -jb;
                }
                else
                {
                    ib = jb;
                }
                for(je=iwork.ptr.p_int[j]+1+iwoff; je<=iwork.ptr.p_int[j+iin]+iwoff; je++)
                {
                    w->ptr.p_double[je] = tmp1;
                    iblock->ptr.p_int[je] = ib;
                }
            }
            *m = *m+im;
        }
    }

    /*
     * If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU
     * If NWL+1 < IL or NWU > IU, discard extra eigenvalues.
     */
    if( irange==3 )
    {
        im = 0;
        idiscl = il-1-nwl;
        idiscu = nwu-iu;
        if( idiscl>0||idiscu>0 )
        {
            for(je=1; je<=*m; je++)
            {
                if( ae_fp_less_eq(w->ptr.p_double[je],wlu)&&idiscl>0 )
                {
                    idiscl = idiscl-1;
                }
                else
                {
                    if( ae_fp_greater_eq(w->ptr.p_double[je],wul)&&idiscu>0 )
                    {
                        idiscu = idiscu-1;
                    }
                    else
                    {
                        im = im+1;
                        w->ptr.p_double[im] = w->ptr.p_double[je];
                        iblock->ptr.p_int[im] = iblock->ptr.p_int[je];
                    }
                }
            }
            *m = im;
        }
        if( idiscl>0||idiscu>0 )
        {

            /*
             * Code to deal with effects of bad arithmetic:
             * Some low eigenvalues to be discarded are not in (WL,WLU],
             * or high eigenvalues to be discarded are not in (WUL,WU]
             * so just kill off the smallest IDISCL/largest IDISCU
             * eigenvalues, by simply finding the smallest/largest
             * eigenvalue(s).
             *
             * (If N(w) is monotone non-decreasing, this should never
             *  happen.)
             */
            if( idiscl>0 )
            {
                wkill = wu;
                for(jdisc=1; jdisc<=idiscl; jdisc++)
                {
                    iw = 0;
                    for(je=1; je<=*m; je++)
                    {
                        if( iblock->ptr.p_int[je]!=0&&(ae_fp_less(w->ptr.p_double[je],wkill)||iw==0) )
                        {
                            iw = je;
                            wkill = w->ptr.p_double[je];
                        }
                    }
                    iblock->ptr.p_int[iw] = 0;
                }
            }
            if( idiscu>0 )
            {
                wkill = wl;
                for(jdisc=1; jdisc<=idiscu; jdisc++)
                {
                    iw = 0;
                    for(je=1; je<=*m; je++)
                    {
                        if( iblock->ptr.p_int[je]!=0&&(ae_fp_greater(w->ptr.p_double[je],wkill)||iw==0) )
                        {
                            iw = je;
                            wkill = w->ptr.p_double[je];
                        }
                    }
                    iblock->ptr.p_int[iw] = 0;
                }
            }
            im = 0;
            for(je=1; je<=*m; je++)
            {
                if( iblock->ptr.p_int[je]!=0 )
                {
                    im = im+1;
                    w->ptr.p_double[im] = w->ptr.p_double[je];
                    iblock->ptr.p_int[im] = iblock->ptr.p_int[je];
                }
            }
            *m = im;
        }
        if( idiscl<0||idiscu<0 )
        {
            toofew = ae_true;
        }
    }

    /*
     * If ORDER='B', do nothing -- the eigenvalues are already sorted
     *    by block.
     * If ORDER='E', sort the eigenvalues from smallest to largest
     */
    if( iorder==1&&*nsplit>1 )
    {
        for(je=1; je<=*m-1; je++)
        {
            ie = 0;
            tmp1 = w->ptr.p_double[je];
            for(j=je+1; j<=*m; j++)
            {
                if( ae_fp_less(w->ptr.p_double[j],tmp1) )
                {
                    ie = j;
                    tmp1 = w->ptr.p_double[j];
                }
            }
            if( ie!=0 )
            {
                itmp1 = iblock->ptr.p_int[ie];
                w->ptr.p_double[ie] = w->ptr.p_double[je];
                iblock->ptr.p_int[ie] = iblock->ptr.p_int[je];
                w->ptr.p_double[je] = tmp1;
                iblock->ptr.p_int[je] = itmp1;
            }
        }
    }
    for(j=1; j<=*m; j++)
    {
        w->ptr.p_double[j] = w->ptr.p_double[j]*scalefactor;
    }
    *errorcode = 0;
    if( ncnvrg )
    {
        *errorcode = *errorcode+1;
    }
    if( toofew )
    {
        *errorcode = *errorcode+2;
    }
    result = *errorcode==0;
    ae_frame_leave(_state);
    return result;
}


static void evd_internaldstein(ae_int_t n,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t m,
     /* Real    */ ae_vector* w,
     /* Integer */ ae_vector* iblock,
     /* Integer */ ae_vector* isplit,
     /* Real    */ ae_matrix* z,
     /* Integer */ ae_vector* ifail,
     ae_int_t* info,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _e;
    ae_vector _w;
    ae_int_t maxits;
    ae_int_t extra;
    ae_int_t b1;
    ae_int_t blksiz;
    ae_int_t bn;
    ae_int_t gpind;
    ae_int_t i;
    ae_int_t iinfo;
    ae_int_t its;
    ae_int_t j;
    ae_int_t j1;
    ae_int_t jblk;
    ae_int_t jmax;
    ae_int_t nblk;
    ae_int_t nrmchk;
    double dtpcrt;
    double eps;
    double eps1;
    double nrm;
    double onenrm;
    double ortol;
    double pertol;
    double scl;
    double sep;
    double tol;
    double xj;
    double xjm;
    double ztr;
    ae_vector work1;
    ae_vector work2;
    ae_vector work3;
    ae_vector work4;
    ae_vector work5;
    ae_vector iwork;
    ae_bool tmpcriterion;
    ae_int_t ti;
    ae_int_t i1;
    ae_int_t i2;
    double v;
    hqrndstate rs;

    ae_frame_make(_state, &_frame_block);
    memset(&_e, 0, sizeof(_e));
    memset(&_w, 0, sizeof(_w));
    memset(&work1, 0, sizeof(work1));
    memset(&work2, 0, sizeof(work2));
    memset(&work3, 0, sizeof(work3));
    memset(&work4, 0, sizeof(work4));
    memset(&work5, 0, sizeof(work5));
    memset(&iwork, 0, sizeof(iwork));
    memset(&rs, 0, sizeof(rs));
    ae_vector_init_copy(&_e, e, _state, ae_true);
    e = &_e;
    ae_vector_init_copy(&_w, w, _state, ae_true);
    w = &_w;
    ae_matrix_clear(z);
    ae_vector_clear(ifail);
    *info = 0;
    ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work3, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work4, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work5, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
    _hqrndstate_init(&rs, _state, ae_true);

    hqrndseed(346436, 2434, &rs, _state);
    maxits = 5;
    extra = 2;
    ae_vector_set_length(&work1, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(&work2, ae_maxint(n-1, 1, _state)+1, _state);
    ae_vector_set_length(&work3, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(&work4, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(&work5, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(&iwork, ae_maxint(n, 1, _state)+1, _state);
    ae_vector_set_length(ifail, ae_maxint(m, 1, _state)+1, _state);
    ae_matrix_set_length(z, ae_maxint(n, 1, _state)+1, ae_maxint(m, 1, _state)+1, _state);

    /*
     * these initializers are not really necessary,
     * but without them compiler complains about uninitialized locals
     */
    gpind = 0;
    onenrm = (double)(0);
    ortol = (double)(0);
    dtpcrt = (double)(0);
    xjm = (double)(0);

    /*
     * Test the input parameters.
     */
    *info = 0;
    for(i=1; i<=m; i++)
    {
        ifail->ptr.p_int[i] = 0;
    }
    if( n<0 )
    {
        *info = -1;
        ae_frame_leave(_state);
        return;
    }
    if( m<0||m>n )
    {
        *info = -4;
        ae_frame_leave(_state);
        return;
    }
    for(j=2; j<=m; j++)
    {
        if( iblock->ptr.p_int[j]<iblock->ptr.p_int[j-1] )
        {
            *info = -6;
            break;
        }
        if( iblock->ptr.p_int[j]==iblock->ptr.p_int[j-1]&&ae_fp_less(w->ptr.p_double[j],w->ptr.p_double[j-1]) )
        {
            *info = -5;
            break;
        }
    }
    if( *info!=0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * Quick return if possible
     */
    if( n==0||m==0 )
    {
        ae_frame_leave(_state);
        return;
    }
    if( n==1 )
    {
        z->ptr.pp_double[1][1] = (double)(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Some preparations
     */
    ti = n-1;
    ae_v_move(&work1.ptr.p_double[1], 1, &e->ptr.p_double[1], 1, ae_v_len(1,ti));
    ae_vector_set_length(e, n+1, _state);
    ae_v_move(&e->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,ti));
    ae_v_move(&work1.ptr.p_double[1], 1, &w->ptr.p_double[1], 1, ae_v_len(1,m));
    ae_vector_set_length(w, n+1, _state);
    ae_v_move(&w->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,m));

    /*
     * Get machine constants.
     */
    eps = ae_machineepsilon;

    /*
     * Compute eigenvectors of matrix blocks.
     */
    j1 = 1;
    for(nblk=1; nblk<=iblock->ptr.p_int[m]; nblk++)
    {

        /*
         * Find starting and ending indices of block nblk.
         */
        if( nblk==1 )
        {
            b1 = 1;
        }
        else
        {
            b1 = isplit->ptr.p_int[nblk-1]+1;
        }
        bn = isplit->ptr.p_int[nblk];
        blksiz = bn-b1+1;
        if( blksiz!=1 )
        {

            /*
             * Compute reorthogonalization criterion and stopping criterion.
             */
            gpind = b1;
            onenrm = ae_fabs(d->ptr.p_double[b1], _state)+ae_fabs(e->ptr.p_double[b1], _state);
            onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[bn], _state)+ae_fabs(e->ptr.p_double[bn-1], _state), _state);
            for(i=b1+1; i<=bn-1; i++)
            {
                onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state)+ae_fabs(e->ptr.p_double[i], _state), _state);
            }
            ortol = 0.001*onenrm;
            dtpcrt = ae_sqrt(0.1/blksiz, _state);
        }

        /*
         * Loop through eigenvalues of block nblk.
         */
        jblk = 0;
        for(j=j1; j<=m; j++)
        {
            if( iblock->ptr.p_int[j]!=nblk )
            {
                j1 = j;
                break;
            }
            jblk = jblk+1;
            xj = w->ptr.p_double[j];
            if( blksiz==1 )
            {

                /*
                 * Skip all the work if the block size is one.
                 */
                work1.ptr.p_double[1] = (double)(1);
            }
            else
            {

                /*
                 * If eigenvalues j and j-1 are too close, add a relatively
                 * small perturbation.
                 */
                if( jblk>1 )
                {
                    eps1 = ae_fabs(eps*xj, _state);
                    pertol = 10*eps1;
                    sep = xj-xjm;
                    if( ae_fp_less(sep,pertol) )
                    {
                        xj = xjm+pertol;
                    }
                }
                its = 0;
                nrmchk = 0;

                /*
                 * Get random starting vector.
                 */
                for(ti=1; ti<=blksiz; ti++)
                {
                    work1.ptr.p_double[ti] = 2*hqrnduniformr(&rs, _state)-1;
                }

                /*
                 * Copy the matrix T so it won't be destroyed in factorization.
                 */
                for(ti=1; ti<=blksiz-1; ti++)
                {
                    work2.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1];
                    work3.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1];
                    work4.ptr.p_double[ti] = d->ptr.p_double[b1+ti-1];
                }
                work4.ptr.p_double[blksiz] = d->ptr.p_double[b1+blksiz-1];

                /*
                 * Compute LU factors with partial pivoting  ( PT = LU )
                 */
                tol = (double)(0);
                evd_tdininternaldlagtf(blksiz, &work4, xj, &work2, &work3, tol, &work5, &iwork, &iinfo, _state);

                /*
                 * Update iteration count.
                 */
                do
                {
                    its = its+1;
                    if( its>maxits )
                    {

                        /*
                         * If stopping criterion was not satisfied, update info and
                         * store eigenvector number in array ifail.
                         */
                        *info = *info+1;
                        ifail->ptr.p_int[*info] = j;
                        break;
                    }

                    /*
                     * Normalize and scale the righthand side vector Pb.
                     */
                    v = (double)(0);
                    for(ti=1; ti<=blksiz; ti++)
                    {
                        v = v+ae_fabs(work1.ptr.p_double[ti], _state);
                    }
                    scl = blksiz*onenrm*ae_maxreal(eps, ae_fabs(work4.ptr.p_double[blksiz], _state), _state)/v;
                    ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl);

                    /*
                     * Solve the system LU = Pb.
                     */
                    evd_tdininternaldlagts(blksiz, &work4, &work2, &work3, &work5, &iwork, &work1, &tol, &iinfo, _state);

                    /*
                     * Reorthogonalize by modified Gram-Schmidt if eigenvalues are
                     * close enough.
                     */
                    if( jblk!=1 )
                    {
                        if( ae_fp_greater(ae_fabs(xj-xjm, _state),ortol) )
                        {
                            gpind = j;
                        }
                        if( gpind!=j )
                        {
                            for(i=gpind; i<=j-1; i++)
                            {
                                i1 = b1;
                                i2 = b1+blksiz-1;
                                ztr = ae_v_dotproduct(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz));
                                ae_v_subd(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz), ztr);
                                touchint(&i2, _state);
                            }
                        }
                    }

                    /*
                     * Check the infinity norm of the iterate.
                     */
                    jmax = vectoridxabsmax(&work1, 1, blksiz, _state);
                    nrm = ae_fabs(work1.ptr.p_double[jmax], _state);

                    /*
                     * Continue for additional iterations after norm reaches
                     * stopping criterion.
                     */
                    tmpcriterion = ae_false;
                    if( ae_fp_less(nrm,dtpcrt) )
                    {
                        tmpcriterion = ae_true;
                    }
                    else
                    {
                        nrmchk = nrmchk+1;
                        if( nrmchk<extra+1 )
                        {
                            tmpcriterion = ae_true;
                        }
                    }
                }
                while(tmpcriterion);

                /*
                 * Accept iterate as jth eigenvector.
                 */
                scl = 1/vectornorm2(&work1, 1, blksiz, _state);
                jmax = vectoridxabsmax(&work1, 1, blksiz, _state);
                if( ae_fp_less(work1.ptr.p_double[jmax],(double)(0)) )
                {
                    scl = -scl;
                }
                ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl);
            }
            for(i=1; i<=n; i++)
            {
                z->ptr.pp_double[i][j] = (double)(0);
            }
            for(i=1; i<=blksiz; i++)
            {
                z->ptr.pp_double[b1+i-1][j] = work1.ptr.p_double[i];
            }

            /*
             * Save the shift to check eigenvalue spacing at next
             * iteration.
             */
            xjm = xj;
        }
    }
    ae_frame_leave(_state);
}


static void evd_tdininternaldlagtf(ae_int_t n,
     /* Real    */ ae_vector* a,
     double lambdav,
     /* Real    */ ae_vector* b,
     /* Real    */ ae_vector* c,
     double tol,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* iin,
     ae_int_t* info,
     ae_state *_state)
{
    ae_int_t k;
    double eps;
    double mult;
    double piv1;
    double piv2;
    double scale1;
    double scale2;
    double temp;
    double tl;

    *info = 0;

    *info = 0;
    if( n<0 )
    {
        *info = -1;
        return;
    }
    if( n==0 )
    {
        return;
    }
    a->ptr.p_double[1] = a->ptr.p_double[1]-lambdav;
    iin->ptr.p_int[n] = 0;
    if( n==1 )
    {
        if( ae_fp_eq(a->ptr.p_double[1],(double)(0)) )
        {
            iin->ptr.p_int[1] = 1;
        }
        return;
    }
    eps = ae_machineepsilon;
    tl = ae_maxreal(tol, eps, _state);
    scale1 = ae_fabs(a->ptr.p_double[1], _state)+ae_fabs(b->ptr.p_double[1], _state);
    for(k=1; k<=n-1; k++)
    {
        a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-lambdav;
        scale2 = ae_fabs(c->ptr.p_double[k], _state)+ae_fabs(a->ptr.p_double[k+1], _state);
        if( k<n-1 )
        {
            scale2 = scale2+ae_fabs(b->ptr.p_double[k+1], _state);
        }
        if( ae_fp_eq(a->ptr.p_double[k],(double)(0)) )
        {
            piv1 = (double)(0);
        }
        else
        {
            piv1 = ae_fabs(a->ptr.p_double[k], _state)/scale1;
        }
        if( ae_fp_eq(c->ptr.p_double[k],(double)(0)) )
        {
            iin->ptr.p_int[k] = 0;
            piv2 = (double)(0);
            scale1 = scale2;
            if( k<n-1 )
            {
                d->ptr.p_double[k] = (double)(0);
            }
        }
        else
        {
            piv2 = ae_fabs(c->ptr.p_double[k], _state)/scale2;
            if( ae_fp_less_eq(piv2,piv1) )
            {
                iin->ptr.p_int[k] = 0;
                scale1 = scale2;
                c->ptr.p_double[k] = c->ptr.p_double[k]/a->ptr.p_double[k];
                a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-c->ptr.p_double[k]*b->ptr.p_double[k];
                if( k<n-1 )
                {
                    d->ptr.p_double[k] = (double)(0);
                }
            }
            else
            {
                iin->ptr.p_int[k] = 1;
                mult = a->ptr.p_double[k]/c->ptr.p_double[k];
                a->ptr.p_double[k] = c->ptr.p_double[k];
                temp = a->ptr.p_double[k+1];
                a->ptr.p_double[k+1] = b->ptr.p_double[k]-mult*temp;
                if( k<n-1 )
                {
                    d->ptr.p_double[k] = b->ptr.p_double[k+1];
                    b->ptr.p_double[k+1] = -mult*d->ptr.p_double[k];
                }
                b->ptr.p_double[k] = temp;
                c->ptr.p_double[k] = mult;
            }
        }
        if( ae_fp_less_eq(ae_maxreal(piv1, piv2, _state),tl)&&iin->ptr.p_int[n]==0 )
        {
            iin->ptr.p_int[n] = k;
        }
    }
    if( ae_fp_less_eq(ae_fabs(a->ptr.p_double[n], _state),scale1*tl)&&iin->ptr.p_int[n]==0 )
    {
        iin->ptr.p_int[n] = n;
    }
}


static void evd_tdininternaldlagts(ae_int_t n,
     /* Real    */ ae_vector* a,
     /* Real    */ ae_vector* b,
     /* Real    */ ae_vector* c,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* iin,
     /* Real    */ ae_vector* y,
     double* tol,
     ae_int_t* info,
     ae_state *_state)
{
    ae_int_t k;
    double absak;
    double ak;
    double bignum;
    double eps;
    double pert;
    double sfmin;
    double temp;

    *info = 0;

    *info = 0;
    if( n<0 )
    {
        *info = -1;
        return;
    }
    if( n==0 )
    {
        return;
    }
    eps = ae_machineepsilon;
    sfmin = ae_minrealnumber;
    bignum = 1/sfmin;
    if( ae_fp_less_eq(*tol,(double)(0)) )
    {
        *tol = ae_fabs(a->ptr.p_double[1], _state);
        if( n>1 )
        {
            *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[2], _state), ae_fabs(b->ptr.p_double[1], _state), _state), _state);
        }
        for(k=3; k<=n; k++)
        {
            *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[k], _state), ae_maxreal(ae_fabs(b->ptr.p_double[k-1], _state), ae_fabs(d->ptr.p_double[k-2], _state), _state), _state), _state);
        }
        *tol = *tol*eps;
        if( ae_fp_eq(*tol,(double)(0)) )
        {
            *tol = eps;
        }
    }
    for(k=2; k<=n; k++)
    {
        if( iin->ptr.p_int[k-1]==0 )
        {
            y->ptr.p_double[k] = y->ptr.p_double[k]-c->ptr.p_double[k-1]*y->ptr.p_double[k-1];
        }
        else
        {
            temp = y->ptr.p_double[k-1];
            y->ptr.p_double[k-1] = y->ptr.p_double[k];
            y->ptr.p_double[k] = temp-c->ptr.p_double[k-1]*y->ptr.p_double[k];
        }
    }
    for(k=n; k>=1; k--)
    {
        if( k<=n-2 )
        {
            temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1]-d->ptr.p_double[k]*y->ptr.p_double[k+2];
        }
        else
        {
            if( k==n-1 )
            {
                temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1];
            }
            else
            {
                temp = y->ptr.p_double[k];
            }
        }
        ak = a->ptr.p_double[k];
        pert = ae_fabs(*tol, _state);
        if( ae_fp_less(ak,(double)(0)) )
        {
            pert = -pert;
        }
        for(;;)
        {
            absak = ae_fabs(ak, _state);
            if( ae_fp_less(absak,(double)(1)) )
            {
                if( ae_fp_less(absak,sfmin) )
                {
                    if( ae_fp_eq(absak,(double)(0))||ae_fp_greater(ae_fabs(temp, _state)*sfmin,absak) )
                    {
                        ak = ak+pert;
                        pert = 2*pert;
                        continue;
                    }
                    else
                    {
                        temp = temp*bignum;
                        ak = ak*bignum;
                    }
                }
                else
                {
                    if( ae_fp_greater(ae_fabs(temp, _state),absak*bignum) )
                    {
                        ak = ak+pert;
                        pert = 2*pert;
                        continue;
                    }
                }
            }
            break;
        }
        y->ptr.p_double[k] = temp/ak;
    }
}


static void evd_internaldlaebz(ae_int_t ijob,
     ae_int_t nitmax,
     ae_int_t n,
     ae_int_t mmax,
     ae_int_t minp,
     double abstol,
     double reltol,
     double pivmin,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     /* Real    */ ae_vector* e2,
     /* Integer */ ae_vector* nval,
     /* Real    */ ae_matrix* ab,
     /* Real    */ ae_vector* c,
     ae_int_t* mout,
     /* Integer */ ae_matrix* nab,
     /* Real    */ ae_vector* work,
     /* Integer */ ae_vector* iwork,
     ae_int_t* info,
     ae_state *_state)
{
    ae_int_t itmp1;
    ae_int_t itmp2;
    ae_int_t j;
    ae_int_t ji;
    ae_int_t jit;
    ae_int_t jp;
    ae_int_t kf;
    ae_int_t kfnew;
    ae_int_t kl;
    ae_int_t klnew;
    double tmp1;
    double tmp2;

    *mout = 0;
    *info = 0;

    *info = 0;
    if( ijob<1||ijob>3 )
    {
        *info = -1;
        return;
    }

    /*
     * Initialize NAB
     */
    if( ijob==1 )
    {

        /*
         * Compute the number of eigenvalues in the initial intervals.
         */
        *mout = 0;

        /*
         *DIR$ NOVECTOR
         */
        for(ji=1; ji<=minp; ji++)
        {
            for(jp=1; jp<=2; jp++)
            {
                tmp1 = d->ptr.p_double[1]-ab->ptr.pp_double[ji][jp];
                if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) )
                {
                    tmp1 = -pivmin;
                }
                nab->ptr.pp_int[ji][jp] = 0;
                if( ae_fp_less_eq(tmp1,(double)(0)) )
                {
                    nab->ptr.pp_int[ji][jp] = 1;
                }
                for(j=2; j<=n; j++)
                {
                    tmp1 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp1-ab->ptr.pp_double[ji][jp];
                    if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) )
                    {
                        tmp1 = -pivmin;
                    }
                    if( ae_fp_less_eq(tmp1,(double)(0)) )
                    {
                        nab->ptr.pp_int[ji][jp] = nab->ptr.pp_int[ji][jp]+1;
                    }
                }
            }
            *mout = *mout+nab->ptr.pp_int[ji][2]-nab->ptr.pp_int[ji][1];
        }
        return;
    }

    /*
     * Initialize for loop
     *
     * KF and KL have the following meaning:
     *   Intervals 1,...,KF-1 have converged.
     *   Intervals KF,...,KL  still need to be refined.
     */
    kf = 1;
    kl = minp;

    /*
     * If IJOB=2, initialize C.
     * If IJOB=3, use the user-supplied starting point.
     */
    if( ijob==2 )
    {
        for(ji=1; ji<=minp; ji++)
        {
            c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]);
        }
    }

    /*
     * Iteration loop
     */
    for(jit=1; jit<=nitmax; jit++)
    {

        /*
         * Loop over intervals
         *
         *
         * Serial Version of the loop
         */
        klnew = kl;
        for(ji=kf; ji<=kl; ji++)
        {

            /*
             * Compute N(w), the number of eigenvalues less than w
             */
            tmp1 = c->ptr.p_double[ji];
            tmp2 = d->ptr.p_double[1]-tmp1;
            itmp1 = 0;
            if( ae_fp_less_eq(tmp2,pivmin) )
            {
                itmp1 = 1;
                tmp2 = ae_minreal(tmp2, -pivmin, _state);
            }

            /*
             * A series of compiler directives to defeat vectorization
             * for the next loop
             *
             **$PL$ CMCHAR=' '
             *CDIR$          NEXTSCALAR
             *C$DIR          SCALAR
             *CDIR$          NEXT SCALAR
             *CVD$L          NOVECTOR
             *CDEC$          NOVECTOR
             *CVD$           NOVECTOR
             **VDIR          NOVECTOR
             **VOCL          LOOP,SCALAR
             *CIBM           PREFER SCALAR
             **$PL$ CMCHAR='*'
             */
            for(j=2; j<=n; j++)
            {
                tmp2 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp2-tmp1;
                if( ae_fp_less_eq(tmp2,pivmin) )
                {
                    itmp1 = itmp1+1;
                    tmp2 = ae_minreal(tmp2, -pivmin, _state);
                }
            }
            if( ijob<=2 )
            {

                /*
                 * IJOB=2: Choose all intervals containing eigenvalues.
                 *
                 * Insure that N(w) is monotone
                 */
                itmp1 = ae_minint(nab->ptr.pp_int[ji][2], ae_maxint(nab->ptr.pp_int[ji][1], itmp1, _state), _state);

                /*
                 * Update the Queue -- add intervals if both halves
                 * contain eigenvalues.
                 */
                if( itmp1==nab->ptr.pp_int[ji][2] )
                {

                    /*
                     * No eigenvalue in the upper interval:
                     * just use the lower interval.
                     */
                    ab->ptr.pp_double[ji][2] = tmp1;
                }
                else
                {
                    if( itmp1==nab->ptr.pp_int[ji][1] )
                    {

                        /*
                         * No eigenvalue in the lower interval:
                         * just use the upper interval.
                         */
                        ab->ptr.pp_double[ji][1] = tmp1;
                    }
                    else
                    {
                        if( klnew<mmax )
                        {

                            /*
                             * Eigenvalue in both intervals -- add upper to queue.
                             */
                            klnew = klnew+1;
                            ab->ptr.pp_double[klnew][2] = ab->ptr.pp_double[ji][2];
                            nab->ptr.pp_int[klnew][2] = nab->ptr.pp_int[ji][2];
                            ab->ptr.pp_double[klnew][1] = tmp1;
                            nab->ptr.pp_int[klnew][1] = itmp1;
                            ab->ptr.pp_double[ji][2] = tmp1;
                            nab->ptr.pp_int[ji][2] = itmp1;
                        }
                        else
                        {
                            *info = mmax+1;
                            return;
                        }
                    }
                }
            }
            else
            {

                /*
                 * IJOB=3: Binary search.  Keep only the interval
                 * containing  w  s.t. N(w) = NVAL
                 */
                if( itmp1<=nval->ptr.p_int[ji] )
                {
                    ab->ptr.pp_double[ji][1] = tmp1;
                    nab->ptr.pp_int[ji][1] = itmp1;
                }
                if( itmp1>=nval->ptr.p_int[ji] )
                {
                    ab->ptr.pp_double[ji][2] = tmp1;
                    nab->ptr.pp_int[ji][2] = itmp1;
                }
            }
        }
        kl = klnew;

        /*
         * Check for convergence
         */
        kfnew = kf;
        for(ji=kf; ji<=kl; ji++)
        {
            tmp1 = ae_fabs(ab->ptr.pp_double[ji][2]-ab->ptr.pp_double[ji][1], _state);
            tmp2 = ae_maxreal(ae_fabs(ab->ptr.pp_double[ji][2], _state), ae_fabs(ab->ptr.pp_double[ji][1], _state), _state);
            if( ae_fp_less(tmp1,ae_maxreal(abstol, ae_maxreal(pivmin, reltol*tmp2, _state), _state))||nab->ptr.pp_int[ji][1]>=nab->ptr.pp_int[ji][2] )
            {

                /*
                 * Converged -- Swap with position KFNEW,
                 * then increment KFNEW
                 */
                if( ji>kfnew )
                {
                    tmp1 = ab->ptr.pp_double[ji][1];
                    tmp2 = ab->ptr.pp_double[ji][2];
                    itmp1 = nab->ptr.pp_int[ji][1];
                    itmp2 = nab->ptr.pp_int[ji][2];
                    ab->ptr.pp_double[ji][1] = ab->ptr.pp_double[kfnew][1];
                    ab->ptr.pp_double[ji][2] = ab->ptr.pp_double[kfnew][2];
                    nab->ptr.pp_int[ji][1] = nab->ptr.pp_int[kfnew][1];
                    nab->ptr.pp_int[ji][2] = nab->ptr.pp_int[kfnew][2];
                    ab->ptr.pp_double[kfnew][1] = tmp1;
                    ab->ptr.pp_double[kfnew][2] = tmp2;
                    nab->ptr.pp_int[kfnew][1] = itmp1;
                    nab->ptr.pp_int[kfnew][2] = itmp2;
                    if( ijob==3 )
                    {
                        itmp1 = nval->ptr.p_int[ji];
                        nval->ptr.p_int[ji] = nval->ptr.p_int[kfnew];
                        nval->ptr.p_int[kfnew] = itmp1;
                    }
                }
                kfnew = kfnew+1;
            }
        }
        kf = kfnew;

        /*
         * Choose Midpoints
         */
        for(ji=kf; ji<=kl; ji++)
        {
            c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]);
        }

        /*
         * If no more intervals to refine, quit.
         */
        if( kf>kl )
        {
            break;
        }
    }

    /*
     * Converged
     */
    *info = ae_maxint(kl+1-kf, 0, _state);
    *mout = kl;
}


/*************************************************************************
Internal subroutine

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     June 30, 1999
*************************************************************************/
static void evd_rmatrixinternaltrevc(/* Real    */ ae_matrix* t,
     ae_int_t n,
     ae_int_t side,
     ae_int_t howmny,
     /* Boolean */ ae_vector* vselect,
     /* Real    */ ae_matrix* vl,
     /* Real    */ ae_matrix* vr,
     ae_int_t* m,
     ae_int_t* info,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _vselect;
    ae_int_t i;
    ae_int_t j;
    ae_matrix t1;
    ae_matrix vl1;
    ae_matrix vr1;
    ae_vector vselect1;

    ae_frame_make(_state, &_frame_block);
    memset(&_vselect, 0, sizeof(_vselect));
    memset(&t1, 0, sizeof(t1));
    memset(&vl1, 0, sizeof(vl1));
    memset(&vr1, 0, sizeof(vr1));
    memset(&vselect1, 0, sizeof(vselect1));
    ae_vector_init_copy(&_vselect, vselect, _state, ae_true);
    vselect = &_vselect;
    *m = 0;
    *info = 0;
    ae_matrix_init(&t1, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&vl1, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&vr1, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&vselect1, 0, DT_BOOL, _state, ae_true);


    /*
     * Allocate VL/VR, if needed
     */
    if( howmny==2||howmny==3 )
    {
        if( side==1||side==3 )
        {
            rmatrixsetlengthatleast(vr, n, n, _state);
        }
        if( side==2||side==3 )
        {
            rmatrixsetlengthatleast(vl, n, n, _state);
        }
    }

    /*
     * Try to use MKL kernel
     */
    if( rmatrixinternaltrevcmkl(t, n, side, howmny, vl, vr, m, info, _state) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * ALGLIB version
     */
    ae_matrix_set_length(&t1, n+1, n+1, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            t1.ptr.pp_double[i+1][j+1] = t->ptr.pp_double[i][j];
        }
    }
    if( howmny==3 )
    {
        ae_vector_set_length(&vselect1, n+1, _state);
        for(i=0; i<=n-1; i++)
        {
            vselect1.ptr.p_bool[1+i] = vselect->ptr.p_bool[i];
        }
    }
    if( (side==2||side==3)&&howmny==1 )
    {
        ae_matrix_set_length(&vl1, n+1, n+1, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                vl1.ptr.pp_double[i+1][j+1] = vl->ptr.pp_double[i][j];
            }
        }
    }
    if( (side==1||side==3)&&howmny==1 )
    {
        ae_matrix_set_length(&vr1, n+1, n+1, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                vr1.ptr.pp_double[i+1][j+1] = vr->ptr.pp_double[i][j];
            }
        }
    }
    evd_internaltrevc(&t1, n, side, howmny, &vselect1, &vl1, &vr1, m, info, _state);
    if( side!=1 )
    {
        rmatrixsetlengthatleast(vl, n, n, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                vl->ptr.pp_double[i][j] = vl1.ptr.pp_double[i+1][j+1];
            }
        }
    }
    if( side!=2 )
    {
        rmatrixsetlengthatleast(vr, n, n, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                vr->ptr.pp_double[i][j] = vr1.ptr.pp_double[i+1][j+1];
            }
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Internal subroutine

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     June 30, 1999
*************************************************************************/
static void evd_internaltrevc(/* Real    */ ae_matrix* t,
     ae_int_t n,
     ae_int_t side,
     ae_int_t howmny,
     /* Boolean */ ae_vector* vselect,
     /* Real    */ ae_matrix* vl,
     /* Real    */ ae_matrix* vr,
     ae_int_t* m,
     ae_int_t* info,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _vselect;
    ae_bool allv;
    ae_bool bothv;
    ae_bool leftv;
    ae_bool over;
    ae_bool pair;
    ae_bool rightv;
    ae_bool somev;
    ae_int_t i;
    ae_int_t ierr;
    ae_int_t ii;
    ae_int_t ip;
    ae_int_t iis;
    ae_int_t j;
    ae_int_t j1;
    ae_int_t j2;
    ae_int_t jnxt;
    ae_int_t k;
    ae_int_t ki;
    ae_int_t n2;
    double beta;
    double bignum;
    double emax;
    double rec;
    double remax;
    double scl;
    double smin;
    double smlnum;
    double ulp;
    double unfl;
    double vcrit;
    double vmax;
    double wi;
    double wr;
    double xnorm;
    ae_matrix x;
    ae_vector work;
    ae_vector temp;
    ae_matrix temp11;
    ae_matrix temp22;
    ae_matrix temp11b;
    ae_matrix temp21b;
    ae_matrix temp12b;
    ae_matrix temp22b;
    ae_bool skipflag;
    ae_int_t k1;
    ae_int_t k2;
    ae_int_t k3;
    ae_int_t k4;
    double vt;
    ae_vector rswap4;
    ae_vector zswap4;
    ae_matrix ipivot44;
    ae_vector civ4;
    ae_vector crv4;

    ae_frame_make(_state, &_frame_block);
    memset(&_vselect, 0, sizeof(_vselect));
    memset(&x, 0, sizeof(x));
    memset(&work, 0, sizeof(work));
    memset(&temp, 0, sizeof(temp));
    memset(&temp11, 0, sizeof(temp11));
    memset(&temp22, 0, sizeof(temp22));
    memset(&temp11b, 0, sizeof(temp11b));
    memset(&temp21b, 0, sizeof(temp21b));
    memset(&temp12b, 0, sizeof(temp12b));
    memset(&temp22b, 0, sizeof(temp22b));
    memset(&rswap4, 0, sizeof(rswap4));
    memset(&zswap4, 0, sizeof(zswap4));
    memset(&ipivot44, 0, sizeof(ipivot44));
    memset(&civ4, 0, sizeof(civ4));
    memset(&crv4, 0, sizeof(crv4));
    ae_vector_init_copy(&_vselect, vselect, _state, ae_true);
    vselect = &_vselect;
    *m = 0;
    *info = 0;
    ae_matrix_init(&x, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&temp, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&temp11, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&temp22, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&temp11b, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&temp21b, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&temp12b, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&temp22b, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&rswap4, 0, DT_BOOL, _state, ae_true);
    ae_vector_init(&zswap4, 0, DT_BOOL, _state, ae_true);
    ae_matrix_init(&ipivot44, 0, 0, DT_INT, _state, ae_true);
    ae_vector_init(&civ4, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&crv4, 0, DT_REAL, _state, ae_true);

    ae_matrix_set_length(&x, 2+1, 2+1, _state);
    ae_matrix_set_length(&temp11, 1+1, 1+1, _state);
    ae_matrix_set_length(&temp11b, 1+1, 1+1, _state);
    ae_matrix_set_length(&temp21b, 2+1, 1+1, _state);
    ae_matrix_set_length(&temp12b, 1+1, 2+1, _state);
    ae_matrix_set_length(&temp22b, 2+1, 2+1, _state);
    ae_matrix_set_length(&temp22, 2+1, 2+1, _state);
    ae_vector_set_length(&work, 3*n+1, _state);
    ae_vector_set_length(&temp, n+1, _state);
    ae_vector_set_length(&rswap4, 4+1, _state);
    ae_vector_set_length(&zswap4, 4+1, _state);
    ae_matrix_set_length(&ipivot44, 4+1, 4+1, _state);
    ae_vector_set_length(&civ4, 4+1, _state);
    ae_vector_set_length(&crv4, 4+1, _state);
    if( howmny!=1 )
    {
        if( side==1||side==3 )
        {
            ae_matrix_set_length(vr, n+1, n+1, _state);
        }
        if( side==2||side==3 )
        {
            ae_matrix_set_length(vl, n+1, n+1, _state);
        }
    }

    /*
     * Decode and test the input parameters
     */
    bothv = side==3;
    rightv = side==1||bothv;
    leftv = side==2||bothv;
    allv = howmny==2;
    over = howmny==1;
    somev = howmny==3;
    *info = 0;
    if( n<0 )
    {
        *info = -2;
        ae_frame_leave(_state);
        return;
    }
    if( !rightv&&!leftv )
    {
        *info = -3;
        ae_frame_leave(_state);
        return;
    }
    if( (!allv&&!over)&&!somev )
    {
        *info = -4;
        ae_frame_leave(_state);
        return;
    }

    /*
     * Set M to the number of columns required to store the selected
     * eigenvectors, standardize the array SELECT if necessary, and
     * test MM.
     */
    if( somev )
    {
        *m = 0;
        pair = ae_false;
        for(j=1; j<=n; j++)
        {
            if( pair )
            {
                pair = ae_false;
                vselect->ptr.p_bool[j] = ae_false;
            }
            else
            {
                if( j<n )
                {
                    if( ae_fp_eq(t->ptr.pp_double[j+1][j],(double)(0)) )
                    {
                        if( vselect->ptr.p_bool[j] )
                        {
                            *m = *m+1;
                        }
                    }
                    else
                    {
                        pair = ae_true;
                        if( vselect->ptr.p_bool[j]||vselect->ptr.p_bool[j+1] )
                        {
                            vselect->ptr.p_bool[j] = ae_true;
                            *m = *m+2;
                        }
                    }
                }
                else
                {
                    if( vselect->ptr.p_bool[n] )
                    {
                        *m = *m+1;
                    }
                }
            }
        }
    }
    else
    {
        *m = n;
    }

    /*
     * Quick return if possible.
     */
    if( n==0 )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * Set the constants to control overflow.
     */
    unfl = ae_minrealnumber;
    ulp = ae_machineepsilon;
    smlnum = unfl*(n/ulp);
    bignum = (1-ulp)/smlnum;

    /*
     * Compute 1-norm of each column of strictly upper triangular
     * part of T to control overflow in triangular solver.
     */
    work.ptr.p_double[1] = (double)(0);
    for(j=2; j<=n; j++)
    {
        work.ptr.p_double[j] = (double)(0);
        for(i=1; i<=j-1; i++)
        {
            work.ptr.p_double[j] = work.ptr.p_double[j]+ae_fabs(t->ptr.pp_double[i][j], _state);
        }
    }

    /*
     * Index IP is used to specify the real or complex eigenvalue:
     * IP = 0, real eigenvalue,
     *      1, first of conjugate complex pair: (wr,wi)
     *     -1, second of conjugate complex pair: (wr,wi)
     */
    n2 = 2*n;
    if( rightv )
    {

        /*
         * Compute right eigenvectors.
         */
        ip = 0;
        iis = *m;
        for(ki=n; ki>=1; ki--)
        {
            skipflag = ae_false;
            if( ip==1 )
            {
                skipflag = ae_true;
            }
            else
            {
                if( ki!=1 )
                {
                    if( ae_fp_neq(t->ptr.pp_double[ki][ki-1],(double)(0)) )
                    {
                        ip = -1;
                    }
                }
                if( somev )
                {
                    if( ip==0 )
                    {
                        if( !vselect->ptr.p_bool[ki] )
                        {
                            skipflag = ae_true;
                        }
                    }
                    else
                    {
                        if( !vselect->ptr.p_bool[ki-1] )
                        {
                            skipflag = ae_true;
                        }
                    }
                }
            }
            if( !skipflag )
            {

                /*
                 * Compute the KI-th eigenvalue (WR,WI).
                 */
                wr = t->ptr.pp_double[ki][ki];
                wi = (double)(0);
                if( ip!=0 )
                {
                    wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki-1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki-1][ki], _state), _state);
                }
                smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state);
                if( ip==0 )
                {

                    /*
                     * Real right eigenvector
                     */
                    work.ptr.p_double[ki+n] = (double)(1);

                    /*
                     * Form right-hand side
                     */
                    for(k=1; k<=ki-1; k++)
                    {
                        work.ptr.p_double[k+n] = -t->ptr.pp_double[k][ki];
                    }

                    /*
                     * Solve the upper quasi-triangular system:
                     *   (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
                     */
                    jnxt = ki-1;
                    for(j=ki-1; j>=1; j--)
                    {
                        if( j>jnxt )
                        {
                            continue;
                        }
                        j1 = j;
                        j2 = j;
                        jnxt = j-1;
                        if( j>1 )
                        {
                            if( ae_fp_neq(t->ptr.pp_double[j][j-1],(double)(0)) )
                            {
                                j1 = j-1;
                                jnxt = j-2;
                            }
                        }
                        if( j1==j2 )
                        {

                            /*
                             * 1-by-1 diagonal block
                             */
                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
                            temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
                            evd_internalhsevdlaln2(ae_false, 1, 1, smin, (double)(1), &temp11, 1.0, 1.0, &temp11b, wr, 0.0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale X(1,1) to avoid overflow when updating
                             * the right-hand side.
                             */
                            if( ae_fp_greater(xnorm,(double)(1)) )
                            {
                                if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) )
                                {
                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
                                    scl = scl/xnorm;
                                }
                            }

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                k1 = n+1;
                                k2 = n+ki;
                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
                            }
                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];

                            /*
                             * Update right-hand side
                             */
                            k1 = 1+n;
                            k2 = j-1+n;
                            k3 = j-1;
                            vt = -x.ptr.pp_double[1][1];
                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt);
                        }
                        else
                        {

                            /*
                             * 2-by-2 diagonal block
                             */
                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1];
                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j];
                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1];
                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j];
                            temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n];
                            temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+n];
                            evd_internalhsevdlaln2(ae_false, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, (double)(0), &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale X(1,1) and X(2,1) to avoid overflow when
                             * updating the right-hand side.
                             */
                            if( ae_fp_greater(xnorm,(double)(1)) )
                            {
                                beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state);
                                if( ae_fp_greater(beta,bignum/xnorm) )
                                {
                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
                                    x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]/xnorm;
                                    scl = scl/xnorm;
                                }
                            }

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                k1 = 1+n;
                                k2 = ki+n;
                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
                            }
                            work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1];
                            work.ptr.p_double[j+n] = x.ptr.pp_double[2][1];

                            /*
                             * Update right-hand side
                             */
                            k1 = 1+n;
                            k2 = j-2+n;
                            k3 = j-2;
                            k4 = j-1;
                            vt = -x.ptr.pp_double[1][1];
                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][k4], t->stride, ae_v_len(k1,k2), vt);
                            vt = -x.ptr.pp_double[2][1];
                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt);
                        }
                    }

                    /*
                     * Copy the vector x or Q*x to VR and normalize.
                     */
                    if( !over )
                    {
                        k1 = 1+n;
                        k2 = ki+n;
                        ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[k1], 1, ae_v_len(1,ki));
                        ii = columnidxabsmax(vr, 1, ki, iis, _state);
                        remax = 1/ae_fabs(vr->ptr.pp_double[ii][iis], _state);
                        ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax);
                        for(k=ki+1; k<=n; k++)
                        {
                            vr->ptr.pp_double[k][iis] = (double)(0);
                        }
                    }
                    else
                    {
                        if( ki>1 )
                        {
                            ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n));
                            matrixvectormultiply(vr, 1, n, 1, ki-1, ae_false, &work, 1+n, ki-1+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
                            ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
                        }
                        ii = columnidxabsmax(vr, 1, n, ki, _state);
                        remax = 1/ae_fabs(vr->ptr.pp_double[ii][ki], _state);
                        ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax);
                    }
                }
                else
                {

                    /*
                     * Complex right eigenvector.
                     *
                     * Initial solve
                     *     [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
                     *     [ (T(KI,KI-1)   T(KI,KI)   )               ]
                     */
                    if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki-1][ki], _state),ae_fabs(t->ptr.pp_double[ki][ki-1], _state)) )
                    {
                        work.ptr.p_double[ki-1+n] = (double)(1);
                        work.ptr.p_double[ki+n2] = wi/t->ptr.pp_double[ki-1][ki];
                    }
                    else
                    {
                        work.ptr.p_double[ki-1+n] = -wi/t->ptr.pp_double[ki][ki-1];
                        work.ptr.p_double[ki+n2] = (double)(1);
                    }
                    work.ptr.p_double[ki+n] = (double)(0);
                    work.ptr.p_double[ki-1+n2] = (double)(0);

                    /*
                     * Form right-hand side
                     */
                    for(k=1; k<=ki-2; k++)
                    {
                        work.ptr.p_double[k+n] = -work.ptr.p_double[ki-1+n]*t->ptr.pp_double[k][ki-1];
                        work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+n2]*t->ptr.pp_double[k][ki];
                    }

                    /*
                     * Solve upper quasi-triangular system:
                     * (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
                     */
                    jnxt = ki-2;
                    for(j=ki-2; j>=1; j--)
                    {
                        if( j>jnxt )
                        {
                            continue;
                        }
                        j1 = j;
                        j2 = j;
                        jnxt = j-1;
                        if( j>1 )
                        {
                            if( ae_fp_neq(t->ptr.pp_double[j][j-1],(double)(0)) )
                            {
                                j1 = j-1;
                                jnxt = j-2;
                            }
                        }
                        if( j1==j2 )
                        {

                            /*
                             * 1-by-1 diagonal block
                             */
                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
                            temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
                            temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
                            evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale X(1,1) and X(1,2) to avoid overflow when
                             * updating the right-hand side.
                             */
                            if( ae_fp_greater(xnorm,(double)(1)) )
                            {
                                if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) )
                                {
                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
                                    x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]/xnorm;
                                    scl = scl/xnorm;
                                }
                            }

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                k1 = 1+n;
                                k2 = ki+n;
                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
                                k1 = 1+n2;
                                k2 = ki+n2;
                                ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
                            }
                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
                            work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];

                            /*
                             * Update the right-hand side
                             */
                            k1 = 1+n;
                            k2 = j-1+n;
                            k3 = 1;
                            k4 = j-1;
                            vt = -x.ptr.pp_double[1][1];
                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt);
                            k1 = 1+n2;
                            k2 = j-1+n2;
                            k3 = 1;
                            k4 = j-1;
                            vt = -x.ptr.pp_double[1][2];
                            ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt);
                        }
                        else
                        {

                            /*
                             * 2-by-2 diagonal block
                             */
                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1];
                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j];
                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1];
                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j];
                            temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n];
                            temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j-1+n+n];
                            temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+n];
                            temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+n+n];
                            evd_internalhsevdlaln2(ae_false, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale X to avoid overflow when updating
                             * the right-hand side.
                             */
                            if( ae_fp_greater(xnorm,(double)(1)) )
                            {
                                beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state);
                                if( ae_fp_greater(beta,bignum/xnorm) )
                                {
                                    rec = 1/xnorm;
                                    x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]*rec;
                                    x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]*rec;
                                    x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]*rec;
                                    x.ptr.pp_double[2][2] = x.ptr.pp_double[2][2]*rec;
                                    scl = scl*rec;
                                }
                            }

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                ae_v_muld(&work.ptr.p_double[1+n], 1, ae_v_len(1+n,ki+n), scl);
                                ae_v_muld(&work.ptr.p_double[1+n2], 1, ae_v_len(1+n2,ki+n2), scl);
                            }
                            work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1];
                            work.ptr.p_double[j+n] = x.ptr.pp_double[2][1];
                            work.ptr.p_double[j-1+n2] = x.ptr.pp_double[1][2];
                            work.ptr.p_double[j+n2] = x.ptr.pp_double[2][2];

                            /*
                             * Update the right-hand side
                             */
                            vt = -x.ptr.pp_double[1][1];
                            ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n+1,n+j-2), vt);
                            vt = -x.ptr.pp_double[2][1];
                            ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n+1,n+j-2), vt);
                            vt = -x.ptr.pp_double[1][2];
                            ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n2+1,n2+j-2), vt);
                            vt = -x.ptr.pp_double[2][2];
                            ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n2+1,n2+j-2), vt);
                        }
                    }

                    /*
                     * Copy the vector x or Q*x to VR and normalize.
                     */
                    if( !over )
                    {
                        ae_v_move(&vr->ptr.pp_double[1][iis-1], vr->stride, &work.ptr.p_double[n+1], 1, ae_v_len(1,ki));
                        ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[n2+1], 1, ae_v_len(1,ki));
                        emax = (double)(0);
                        for(k=1; k<=ki; k++)
                        {
                            emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][iis-1], _state)+ae_fabs(vr->ptr.pp_double[k][iis], _state), _state);
                        }
                        remax = 1/emax;
                        ae_v_muld(&vr->ptr.pp_double[1][iis-1], vr->stride, ae_v_len(1,ki), remax);
                        ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax);
                        for(k=ki+1; k<=n; k++)
                        {
                            vr->ptr.pp_double[k][iis-1] = (double)(0);
                            vr->ptr.pp_double[k][iis] = (double)(0);
                        }
                    }
                    else
                    {
                        if( ki>2 )
                        {
                            ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n));
                            matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n, ki-2+n, 1.0, &temp, 1, n, work.ptr.p_double[ki-1+n], _state);
                            ae_v_move(&vr->ptr.pp_double[1][ki-1], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
                            ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n));
                            matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n2, ki-2+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+n2], _state);
                            ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
                        }
                        else
                        {
                            vt = work.ptr.p_double[ki-1+n];
                            ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), vt);
                            vt = work.ptr.p_double[ki+n2];
                            ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), vt);
                        }
                        emax = (double)(0);
                        for(k=1; k<=n; k++)
                        {
                            emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][ki-1], _state)+ae_fabs(vr->ptr.pp_double[k][ki], _state), _state);
                        }
                        remax = 1/emax;
                        ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), remax);
                        ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax);
                    }
                }
                iis = iis-1;
                if( ip!=0 )
                {
                    iis = iis-1;
                }
            }
            if( ip==1 )
            {
                ip = 0;
            }
            if( ip==-1 )
            {
                ip = 1;
            }
        }
    }
    if( leftv )
    {

        /*
         * Compute left eigenvectors.
         */
        ip = 0;
        iis = 1;
        for(ki=1; ki<=n; ki++)
        {
            skipflag = ae_false;
            if( ip==-1 )
            {
                skipflag = ae_true;
            }
            else
            {
                if( ki!=n )
                {
                    if( ae_fp_neq(t->ptr.pp_double[ki+1][ki],(double)(0)) )
                    {
                        ip = 1;
                    }
                }
                if( somev )
                {
                    if( !vselect->ptr.p_bool[ki] )
                    {
                        skipflag = ae_true;
                    }
                }
            }
            if( !skipflag )
            {

                /*
                 * Compute the KI-th eigenvalue (WR,WI).
                 */
                wr = t->ptr.pp_double[ki][ki];
                wi = (double)(0);
                if( ip!=0 )
                {
                    wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki+1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki+1][ki], _state), _state);
                }
                smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state);
                if( ip==0 )
                {

                    /*
                     * Real left eigenvector.
                     */
                    work.ptr.p_double[ki+n] = (double)(1);

                    /*
                     * Form right-hand side
                     */
                    for(k=ki+1; k<=n; k++)
                    {
                        work.ptr.p_double[k+n] = -t->ptr.pp_double[ki][k];
                    }

                    /*
                     * Solve the quasi-triangular system:
                     * (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
                     */
                    vmax = (double)(1);
                    vcrit = bignum;
                    jnxt = ki+1;
                    for(j=ki+1; j<=n; j++)
                    {
                        if( j<jnxt )
                        {
                            continue;
                        }
                        j1 = j;
                        j2 = j;
                        jnxt = j+1;
                        if( j<n )
                        {
                            if( ae_fp_neq(t->ptr.pp_double[j+1][j],(double)(0)) )
                            {
                                j2 = j+1;
                                jnxt = j+2;
                            }
                        }
                        if( j1==j2 )
                        {

                            /*
                             * 1-by-1 diagonal block
                             *
                             * Scale if necessary to avoid overflow when forming
                             * the right-hand side.
                             */
                            if( ae_fp_greater(work.ptr.p_double[j],vcrit) )
                            {
                                rec = 1/vmax;
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
                                vmax = (double)(1);
                                vcrit = bignum;
                            }
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;

                            /*
                             * Solve (T(J,J)-WR)'*X = WORK
                             */
                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
                            temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
                            evd_internalhsevdlaln2(ae_false, 1, 1, smin, 1.0, &temp11, 1.0, 1.0, &temp11b, wr, (double)(0), &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
                            }
                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
                            vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), vmax, _state);
                            vcrit = bignum/vmax;
                        }
                        else
                        {

                            /*
                             * 2-by-2 diagonal block
                             *
                             * Scale if necessary to avoid overflow when forming
                             * the right-hand side.
                             */
                            beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state);
                            if( ae_fp_greater(beta,vcrit) )
                            {
                                rec = 1/vmax;
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
                                vmax = (double)(1);
                                vcrit = bignum;
                            }
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j+1], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
                            work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt;

                            /*
                             * Solve
                             *    [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
                             *    [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
                             */
                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1];
                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j];
                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1];
                            temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
                            temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n];
                            evd_internalhsevdlaln2(ae_true, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, (double)(0), &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
                            }
                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
                            work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1];
                            vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+1+n], _state), vmax, _state), _state);
                            vcrit = bignum/vmax;
                        }
                    }

                    /*
                     * Copy the vector x or Q*x to VL and normalize.
                     */
                    if( !over )
                    {
                        ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n));
                        ii = columnidxabsmax(vl, ki, n, iis, _state);
                        remax = 1/ae_fabs(vl->ptr.pp_double[ii][iis], _state);
                        ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax);
                        for(k=1; k<=ki-1; k++)
                        {
                            vl->ptr.pp_double[k][iis] = (double)(0);
                        }
                    }
                    else
                    {
                        if( ki<n )
                        {
                            ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n));
                            matrixvectormultiply(vl, 1, n, ki+1, n, ae_false, &work, ki+1+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
                            ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
                        }
                        ii = columnidxabsmax(vl, 1, n, ki, _state);
                        remax = 1/ae_fabs(vl->ptr.pp_double[ii][ki], _state);
                        ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax);
                    }
                }
                else
                {

                    /*
                     * Complex left eigenvector.
                     *
                     * Initial solve:
                     *   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
                     *   ((T(KI+1,KI) T(KI+1,KI+1))                )
                     */
                    if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki][ki+1], _state),ae_fabs(t->ptr.pp_double[ki+1][ki], _state)) )
                    {
                        work.ptr.p_double[ki+n] = wi/t->ptr.pp_double[ki][ki+1];
                        work.ptr.p_double[ki+1+n2] = (double)(1);
                    }
                    else
                    {
                        work.ptr.p_double[ki+n] = (double)(1);
                        work.ptr.p_double[ki+1+n2] = -wi/t->ptr.pp_double[ki+1][ki];
                    }
                    work.ptr.p_double[ki+1+n] = (double)(0);
                    work.ptr.p_double[ki+n2] = (double)(0);

                    /*
                     * Form right-hand side
                     */
                    for(k=ki+2; k<=n; k++)
                    {
                        work.ptr.p_double[k+n] = -work.ptr.p_double[ki+n]*t->ptr.pp_double[ki][k];
                        work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+1+n2]*t->ptr.pp_double[ki+1][k];
                    }

                    /*
                     * Solve complex quasi-triangular system:
                     * ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
                     */
                    vmax = (double)(1);
                    vcrit = bignum;
                    jnxt = ki+2;
                    for(j=ki+2; j<=n; j++)
                    {
                        if( j<jnxt )
                        {
                            continue;
                        }
                        j1 = j;
                        j2 = j;
                        jnxt = j+1;
                        if( j<n )
                        {
                            if( ae_fp_neq(t->ptr.pp_double[j+1][j],(double)(0)) )
                            {
                                j2 = j+1;
                                jnxt = j+2;
                            }
                        }
                        if( j1==j2 )
                        {

                            /*
                             * 1-by-1 diagonal block
                             *
                             * Scale if necessary to avoid overflow when
                             * forming the right-hand side elements.
                             */
                            if( ae_fp_greater(work.ptr.p_double[j],vcrit) )
                            {
                                rec = 1/vmax;
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec);
                                vmax = (double)(1);
                                vcrit = bignum;
                            }
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
                            work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt;

                            /*
                             * Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
                             */
                            temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
                            temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
                            temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
                            evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl);
                            }
                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
                            work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
                            vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+n2], _state), vmax, _state), _state);
                            vcrit = bignum/vmax;
                        }
                        else
                        {

                            /*
                             * 2-by-2 diagonal block
                             *
                             * Scale if necessary to avoid overflow when forming
                             * the right-hand side elements.
                             */
                            beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state);
                            if( ae_fp_greater(beta,vcrit) )
                            {
                                rec = 1/vmax;
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec);
                                vmax = (double)(1);
                                vcrit = bignum;
                            }
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
                            work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
                            work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt;
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
                            work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt;
                            vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
                            work.ptr.p_double[j+1+n2] = work.ptr.p_double[j+1+n2]-vt;

                            /*
                             * Solve 2-by-2 complex linear equation
                             *   ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
                             *   ([T(j+1,j) T(j+1,j+1)]             )
                             */
                            temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
                            temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1];
                            temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j];
                            temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1];
                            temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
                            temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
                            temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n];
                            temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+1+n+n];
                            evd_internalhsevdlaln2(ae_true, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);

                            /*
                             * Scale if necessary
                             */
                            if( ae_fp_neq(scl,(double)(1)) )
                            {
                                ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
                                ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl);
                            }
                            work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
                            work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
                            work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1];
                            work.ptr.p_double[j+1+n2] = x.ptr.pp_double[2][2];
                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][1], _state), vmax, _state);
                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][2], _state), vmax, _state);
                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][1], _state), vmax, _state);
                            vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][2], _state), vmax, _state);
                            vcrit = bignum/vmax;
                        }
                    }

                    /*
                     * Copy the vector x or Q*x to VL and normalize.
                     */
                    if( !over )
                    {
                        ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n));
                        ae_v_move(&vl->ptr.pp_double[ki][iis+1], vl->stride, &work.ptr.p_double[ki+n2], 1, ae_v_len(ki,n));
                        emax = (double)(0);
                        for(k=ki; k<=n; k++)
                        {
                            emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][iis], _state)+ae_fabs(vl->ptr.pp_double[k][iis+1], _state), _state);
                        }
                        remax = 1/emax;
                        ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax);
                        ae_v_muld(&vl->ptr.pp_double[ki][iis+1], vl->stride, ae_v_len(ki,n), remax);
                        for(k=1; k<=ki-1; k++)
                        {
                            vl->ptr.pp_double[k][iis] = (double)(0);
                            vl->ptr.pp_double[k][iis+1] = (double)(0);
                        }
                    }
                    else
                    {
                        if( ki<n-1 )
                        {
                            ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n));
                            matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
                            ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
                            ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n));
                            matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n2, n+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+1+n2], _state);
                            ae_v_move(&vl->ptr.pp_double[1][ki+1], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
                        }
                        else
                        {
                            vt = work.ptr.p_double[ki+n];
                            ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), vt);
                            vt = work.ptr.p_double[ki+1+n2];
                            ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), vt);
                        }
                        emax = (double)(0);
                        for(k=1; k<=n; k++)
                        {
                            emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][ki], _state)+ae_fabs(vl->ptr.pp_double[k][ki+1], _state), _state);
                        }
                        remax = 1/emax;
                        ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax);
                        ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), remax);
                    }
                }
                iis = iis+1;
                if( ip!=0 )
                {
                    iis = iis+1;
                }
            }
            if( ip==-1 )
            {
                ip = 0;
            }
            if( ip==1 )
            {
                ip = -1;
            }
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
DLALN2 solves a system of the form  (ca A - w D ) X = s B
or (ca A' - w D) X = s B   with possible scaling ("s") and
perturbation of A.  (A' means A-transpose.)

A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
real diagonal matrix, w is a real or complex value, and X and B are
NA x 1 matrices -- real if w is real, complex if w is complex.  NA
may be 1 or 2.

If w is complex, X and B are represented as NA x 2 matrices,
the first column of each being the real part and the second
being the imaginary part.

"s" is a scaling factor (.LE. 1), computed by DLALN2, which is
so chosen that X can be computed without overflow.  X is further
scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
than overflow.

If both singular values of (ca A - w D) are less than SMIN,
SMIN*identity will be used instead of (ca A - w D).  If only one
singular value is less than SMIN, one element of (ca A - w D) will be
perturbed enough to make the smallest singular value roughly SMIN.
If both singular values are at least SMIN, (ca A - w D) will not be
perturbed.  In any case, the perturbation will be at most some small
multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
are computed by infinity-norm approximations, and thus will only be
correct to a factor of 2 or so.

Note: all input quantities are assumed to be smaller than overflow
by a reasonable factor.  (See BIGNUM.)

  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
static void evd_internalhsevdlaln2(ae_bool ltrans,
     ae_int_t na,
     ae_int_t nw,
     double smin,
     double ca,
     /* Real    */ ae_matrix* a,
     double d1,
     double d2,
     /* Real    */ ae_matrix* b,
     double wr,
     double wi,
     /* Boolean */ ae_vector* rswap4,
     /* Boolean */ ae_vector* zswap4,
     /* Integer */ ae_matrix* ipivot44,
     /* Real    */ ae_vector* civ4,
     /* Real    */ ae_vector* crv4,
     /* Real    */ ae_matrix* x,
     double* scl,
     double* xnorm,
     ae_int_t* info,
     ae_state *_state)
{
    ae_int_t icmax;
    ae_int_t j;
    double bbnd;
    double bi1;
    double bi2;
    double bignum;
    double bnorm;
    double br1;
    double br2;
    double ci21;
    double ci22;
    double cmax;
    double cnorm;
    double cr21;
    double cr22;
    double csi;
    double csr;
    double li21;
    double lr21;
    double smini;
    double smlnum;
    double temp;
    double u22abs;
    double ui11;
    double ui11r;
    double ui12;
    double ui12s;
    double ui22;
    double ur11;
    double ur11r;
    double ur12;
    double ur12s;
    double ur22;
    double xi1;
    double xi2;
    double xr1;
    double xr2;
    double tmp1;
    double tmp2;

    *scl = 0;
    *xnorm = 0;
    *info = 0;

    zswap4->ptr.p_bool[1] = ae_false;
    zswap4->ptr.p_bool[2] = ae_false;
    zswap4->ptr.p_bool[3] = ae_true;
    zswap4->ptr.p_bool[4] = ae_true;
    rswap4->ptr.p_bool[1] = ae_false;
    rswap4->ptr.p_bool[2] = ae_true;
    rswap4->ptr.p_bool[3] = ae_false;
    rswap4->ptr.p_bool[4] = ae_true;
    ipivot44->ptr.pp_int[1][1] = 1;
    ipivot44->ptr.pp_int[2][1] = 2;
    ipivot44->ptr.pp_int[3][1] = 3;
    ipivot44->ptr.pp_int[4][1] = 4;
    ipivot44->ptr.pp_int[1][2] = 2;
    ipivot44->ptr.pp_int[2][2] = 1;
    ipivot44->ptr.pp_int[3][2] = 4;
    ipivot44->ptr.pp_int[4][2] = 3;
    ipivot44->ptr.pp_int[1][3] = 3;
    ipivot44->ptr.pp_int[2][3] = 4;
    ipivot44->ptr.pp_int[3][3] = 1;
    ipivot44->ptr.pp_int[4][3] = 2;
    ipivot44->ptr.pp_int[1][4] = 4;
    ipivot44->ptr.pp_int[2][4] = 3;
    ipivot44->ptr.pp_int[3][4] = 2;
    ipivot44->ptr.pp_int[4][4] = 1;
    smlnum = 2*ae_minrealnumber;
    bignum = 1/smlnum;
    smini = ae_maxreal(smin, smlnum, _state);

    /*
     * Don't check for input errors
     */
    *info = 0;

    /*
     * Standard Initializations
     */
    *scl = (double)(1);
    if( na==1 )
    {

        /*
         * 1 x 1  (i.e., scalar) system   C X = B
         */
        if( nw==1 )
        {

            /*
             * Real 1x1 system.
             *
             * C = ca A - w D
             */
            csr = ca*a->ptr.pp_double[1][1]-wr*d1;
            cnorm = ae_fabs(csr, _state);

            /*
             * If | C | < SMINI, use C = SMINI
             */
            if( ae_fp_less(cnorm,smini) )
            {
                csr = smini;
                cnorm = smini;
                *info = 1;
            }

            /*
             * Check scaling for  X = B / C
             */
            bnorm = ae_fabs(b->ptr.pp_double[1][1], _state);
            if( ae_fp_less(cnorm,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
            {
                if( ae_fp_greater(bnorm,bignum*cnorm) )
                {
                    *scl = 1/bnorm;
                }
            }

            /*
             * Compute X
             */
            x->ptr.pp_double[1][1] = b->ptr.pp_double[1][1]*(*scl)/csr;
            *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state);
        }
        else
        {

            /*
             * Complex 1x1 system (w is complex)
             *
             * C = ca A - w D
             */
            csr = ca*a->ptr.pp_double[1][1]-wr*d1;
            csi = -wi*d1;
            cnorm = ae_fabs(csr, _state)+ae_fabs(csi, _state);

            /*
             * If | C | < SMINI, use C = SMINI
             */
            if( ae_fp_less(cnorm,smini) )
            {
                csr = smini;
                csi = (double)(0);
                cnorm = smini;
                *info = 1;
            }

            /*
             * Check scaling for  X = B / C
             */
            bnorm = ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state);
            if( ae_fp_less(cnorm,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
            {
                if( ae_fp_greater(bnorm,bignum*cnorm) )
                {
                    *scl = 1/bnorm;
                }
            }

            /*
             * Compute X
             */
            evd_internalhsevdladiv(*scl*b->ptr.pp_double[1][1], *scl*b->ptr.pp_double[1][2], csr, csi, &tmp1, &tmp2, _state);
            x->ptr.pp_double[1][1] = tmp1;
            x->ptr.pp_double[1][2] = tmp2;
            *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state)+ae_fabs(x->ptr.pp_double[1][2], _state);
        }
    }
    else
    {

        /*
         * 2x2 System
         *
         * Compute the real part of  C = ca A - w D  (or  ca A' - w D )
         */
        crv4->ptr.p_double[1+0] = ca*a->ptr.pp_double[1][1]-wr*d1;
        crv4->ptr.p_double[2+2] = ca*a->ptr.pp_double[2][2]-wr*d2;
        if( ltrans )
        {
            crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[2][1];
            crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[1][2];
        }
        else
        {
            crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[2][1];
            crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[1][2];
        }
        if( nw==1 )
        {

            /*
             * Real 2x2 system  (w is real)
             *
             * Find the largest element in C
             */
            cmax = (double)(0);
            icmax = 0;
            for(j=1; j<=4; j++)
            {
                if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state),cmax) )
                {
                    cmax = ae_fabs(crv4->ptr.p_double[j], _state);
                    icmax = j;
                }
            }

            /*
             * If norm(C) < SMINI, use SMINI*identity.
             */
            if( ae_fp_less(cmax,smini) )
            {
                bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state), ae_fabs(b->ptr.pp_double[2][1], _state), _state);
                if( ae_fp_less(smini,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
                {
                    if( ae_fp_greater(bnorm,bignum*smini) )
                    {
                        *scl = 1/bnorm;
                    }
                }
                temp = *scl/smini;
                x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1];
                x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1];
                *xnorm = temp*bnorm;
                *info = 1;
                return;
            }

            /*
             * Gaussian elimination with complete pivoting.
             */
            ur11 = crv4->ptr.p_double[icmax];
            cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
            ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
            cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
            ur11r = 1/ur11;
            lr21 = ur11r*cr21;
            ur22 = cr22-ur12*lr21;

            /*
             * If smaller pivot < SMINI, use SMINI
             */
            if( ae_fp_less(ae_fabs(ur22, _state),smini) )
            {
                ur22 = smini;
                *info = 1;
            }
            if( rswap4->ptr.p_bool[icmax] )
            {
                br1 = b->ptr.pp_double[2][1];
                br2 = b->ptr.pp_double[1][1];
            }
            else
            {
                br1 = b->ptr.pp_double[1][1];
                br2 = b->ptr.pp_double[2][1];
            }
            br2 = br2-lr21*br1;
            bbnd = ae_maxreal(ae_fabs(br1*(ur22*ur11r), _state), ae_fabs(br2, _state), _state);
            if( ae_fp_greater(bbnd,(double)(1))&&ae_fp_less(ae_fabs(ur22, _state),(double)(1)) )
            {
                if( ae_fp_greater_eq(bbnd,bignum*ae_fabs(ur22, _state)) )
                {
                    *scl = 1/bbnd;
                }
            }
            xr2 = br2*(*scl)/ur22;
            xr1 = *scl*br1*ur11r-xr2*(ur11r*ur12);
            if( zswap4->ptr.p_bool[icmax] )
            {
                x->ptr.pp_double[1][1] = xr2;
                x->ptr.pp_double[2][1] = xr1;
            }
            else
            {
                x->ptr.pp_double[1][1] = xr1;
                x->ptr.pp_double[2][1] = xr2;
            }
            *xnorm = ae_maxreal(ae_fabs(xr1, _state), ae_fabs(xr2, _state), _state);

            /*
             * Further scaling if  norm(A) norm(X) > overflow
             */
            if( ae_fp_greater(*xnorm,(double)(1))&&ae_fp_greater(cmax,(double)(1)) )
            {
                if( ae_fp_greater(*xnorm,bignum/cmax) )
                {
                    temp = cmax/bignum;
                    x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1];
                    x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1];
                    *xnorm = temp*(*xnorm);
                    *scl = temp*(*scl);
                }
            }
        }
        else
        {

            /*
             * Complex 2x2 system  (w is complex)
             *
             * Find the largest element in C
             */
            civ4->ptr.p_double[1+0] = -wi*d1;
            civ4->ptr.p_double[2+0] = (double)(0);
            civ4->ptr.p_double[1+2] = (double)(0);
            civ4->ptr.p_double[2+2] = -wi*d2;
            cmax = (double)(0);
            icmax = 0;
            for(j=1; j<=4; j++)
            {
                if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state),cmax) )
                {
                    cmax = ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state);
                    icmax = j;
                }
            }

            /*
             * If norm(C) < SMINI, use SMINI*identity.
             */
            if( ae_fp_less(cmax,smini) )
            {
                bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state), ae_fabs(b->ptr.pp_double[2][1], _state)+ae_fabs(b->ptr.pp_double[2][2], _state), _state);
                if( ae_fp_less(smini,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
                {
                    if( ae_fp_greater(bnorm,bignum*smini) )
                    {
                        *scl = 1/bnorm;
                    }
                }
                temp = *scl/smini;
                x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1];
                x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1];
                x->ptr.pp_double[1][2] = temp*b->ptr.pp_double[1][2];
                x->ptr.pp_double[2][2] = temp*b->ptr.pp_double[2][2];
                *xnorm = temp*bnorm;
                *info = 1;
                return;
            }

            /*
             * Gaussian elimination with complete pivoting.
             */
            ur11 = crv4->ptr.p_double[icmax];
            ui11 = civ4->ptr.p_double[icmax];
            cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
            ci21 = civ4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
            ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
            ui12 = civ4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
            cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
            ci22 = civ4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
            if( icmax==1||icmax==4 )
            {

                /*
                 * Code when off-diagonals of pivoted C are real
                 */
                if( ae_fp_greater(ae_fabs(ur11, _state),ae_fabs(ui11, _state)) )
                {
                    temp = ui11/ur11;
                    ur11r = 1/(ur11*(1+ae_sqr(temp, _state)));
                    ui11r = -temp*ur11r;
                }
                else
                {
                    temp = ur11/ui11;
                    ui11r = -1/(ui11*(1+ae_sqr(temp, _state)));
                    ur11r = -temp*ui11r;
                }
                lr21 = cr21*ur11r;
                li21 = cr21*ui11r;
                ur12s = ur12*ur11r;
                ui12s = ur12*ui11r;
                ur22 = cr22-ur12*lr21;
                ui22 = ci22-ur12*li21;
            }
            else
            {

                /*
                 * Code when diagonals of pivoted C are real
                 */
                ur11r = 1/ur11;
                ui11r = (double)(0);
                lr21 = cr21*ur11r;
                li21 = ci21*ur11r;
                ur12s = ur12*ur11r;
                ui12s = ui12*ur11r;
                ur22 = cr22-ur12*lr21+ui12*li21;
                ui22 = -ur12*li21-ui12*lr21;
            }
            u22abs = ae_fabs(ur22, _state)+ae_fabs(ui22, _state);

            /*
             * If smaller pivot < SMINI, use SMINI
             */
            if( ae_fp_less(u22abs,smini) )
            {
                ur22 = smini;
                ui22 = (double)(0);
                *info = 1;
            }
            if( rswap4->ptr.p_bool[icmax] )
            {
                br2 = b->ptr.pp_double[1][1];
                br1 = b->ptr.pp_double[2][1];
                bi2 = b->ptr.pp_double[1][2];
                bi1 = b->ptr.pp_double[2][2];
            }
            else
            {
                br1 = b->ptr.pp_double[1][1];
                br2 = b->ptr.pp_double[2][1];
                bi1 = b->ptr.pp_double[1][2];
                bi2 = b->ptr.pp_double[2][2];
            }
            br2 = br2-lr21*br1+li21*bi1;
            bi2 = bi2-li21*br1-lr21*bi1;
            bbnd = ae_maxreal((ae_fabs(br1, _state)+ae_fabs(bi1, _state))*(u22abs*(ae_fabs(ur11r, _state)+ae_fabs(ui11r, _state))), ae_fabs(br2, _state)+ae_fabs(bi2, _state), _state);
            if( ae_fp_greater(bbnd,(double)(1))&&ae_fp_less(u22abs,(double)(1)) )
            {
                if( ae_fp_greater_eq(bbnd,bignum*u22abs) )
                {
                    *scl = 1/bbnd;
                    br1 = *scl*br1;
                    bi1 = *scl*bi1;
                    br2 = *scl*br2;
                    bi2 = *scl*bi2;
                }
            }
            evd_internalhsevdladiv(br2, bi2, ur22, ui22, &xr2, &xi2, _state);
            xr1 = ur11r*br1-ui11r*bi1-ur12s*xr2+ui12s*xi2;
            xi1 = ui11r*br1+ur11r*bi1-ui12s*xr2-ur12s*xi2;
            if( zswap4->ptr.p_bool[icmax] )
            {
                x->ptr.pp_double[1][1] = xr2;
                x->ptr.pp_double[2][1] = xr1;
                x->ptr.pp_double[1][2] = xi2;
                x->ptr.pp_double[2][2] = xi1;
            }
            else
            {
                x->ptr.pp_double[1][1] = xr1;
                x->ptr.pp_double[2][1] = xr2;
                x->ptr.pp_double[1][2] = xi1;
                x->ptr.pp_double[2][2] = xi2;
            }
            *xnorm = ae_maxreal(ae_fabs(xr1, _state)+ae_fabs(xi1, _state), ae_fabs(xr2, _state)+ae_fabs(xi2, _state), _state);

            /*
             * Further scaling if  norm(A) norm(X) > overflow
             */
            if( ae_fp_greater(*xnorm,(double)(1))&&ae_fp_greater(cmax,(double)(1)) )
            {
                if( ae_fp_greater(*xnorm,bignum/cmax) )
                {
                    temp = cmax/bignum;
                    x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1];
                    x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1];
                    x->ptr.pp_double[1][2] = temp*x->ptr.pp_double[1][2];
                    x->ptr.pp_double[2][2] = temp*x->ptr.pp_double[2][2];
                    *xnorm = temp*(*xnorm);
                    *scl = temp*(*scl);
                }
            }
        }
    }
}


/*************************************************************************
performs complex division in  real arithmetic

                        a + i*b
             p + i*q = ---------
                        c + i*d

The algorithm is due to Robert L. Smith and can be found
in D. Knuth, The art of Computer Programming, Vol.2, p.195

  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1992
*************************************************************************/
static void evd_internalhsevdladiv(double a,
     double b,
     double c,
     double d,
     double* p,
     double* q,
     ae_state *_state)
{
    double e;
    double f;

    *p = 0;
    *q = 0;

    if( ae_fp_less(ae_fabs(d, _state),ae_fabs(c, _state)) )
    {
        e = d/c;
        f = c+d*e;
        *p = (a+b*e)/f;
        *q = (b-a*e)/f;
    }
    else
    {
        e = c/d;
        f = d+c*e;
        *p = (b+a*e)/f;
        *q = (-a+b*e)/f;
    }
}


void _eigsubspacestate_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    eigsubspacestate *p = (eigsubspacestate*)_p;
    ae_touch_ptr((void*)p);
    _hqrndstate_init(&p->rs, _state, make_automatic);
    ae_vector_init(&p->tau, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->q0, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->qcur, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->qnew, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->znew, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->r, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->rz, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->tz, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->rq, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->dummy, 0, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->rw, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->tw, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->wcur, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->wprev, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->wrank, 0, DT_REAL, _state, make_automatic);
    _apbuffers_init(&p->buf, _state, make_automatic);
    ae_matrix_init(&p->x, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->ax, 0, 0, DT_REAL, _state, make_automatic);
    _rcommstate_init(&p->rstate, _state, make_automatic);
}


void _eigsubspacestate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    eigsubspacestate *dst = (eigsubspacestate*)_dst;
    eigsubspacestate *src = (eigsubspacestate*)_src;
    dst->n = src->n;
    dst->k = src->k;
    dst->nwork = src->nwork;
    dst->maxits = src->maxits;
    dst->eps = src->eps;
    dst->eigenvectorsneeded = src->eigenvectorsneeded;
    dst->matrixtype = src->matrixtype;
    dst->usewarmstart = src->usewarmstart;
    dst->firstcall = src->firstcall;
    _hqrndstate_init_copy(&dst->rs, &src->rs, _state, make_automatic);
    dst->running = src->running;
    ae_vector_init_copy(&dst->tau, &src->tau, _state, make_automatic);
    ae_matrix_init_copy(&dst->q0, &src->q0, _state, make_automatic);
    ae_matrix_init_copy(&dst->qcur, &src->qcur, _state, make_automatic);
    ae_matrix_init_copy(&dst->qnew, &src->qnew, _state, make_automatic);
    ae_matrix_init_copy(&dst->znew, &src->znew, _state, make_automatic);
    ae_matrix_init_copy(&dst->r, &src->r, _state, make_automatic);
    ae_matrix_init_copy(&dst->rz, &src->rz, _state, make_automatic);
    ae_matrix_init_copy(&dst->tz, &src->tz, _state, make_automatic);
    ae_matrix_init_copy(&dst->rq, &src->rq, _state, make_automatic);
    ae_matrix_init_copy(&dst->dummy, &src->dummy, _state, make_automatic);
    ae_vector_init_copy(&dst->rw, &src->rw, _state, make_automatic);
    ae_vector_init_copy(&dst->tw, &src->tw, _state, make_automatic);
    ae_vector_init_copy(&dst->wcur, &src->wcur, _state, make_automatic);
    ae_vector_init_copy(&dst->wprev, &src->wprev, _state, make_automatic);
    ae_vector_init_copy(&dst->wrank, &src->wrank, _state, make_automatic);
    _apbuffers_init_copy(&dst->buf, &src->buf, _state, make_automatic);
    ae_matrix_init_copy(&dst->x, &src->x, _state, make_automatic);
    ae_matrix_init_copy(&dst->ax, &src->ax, _state, make_automatic);
    dst->requesttype = src->requesttype;
    dst->requestsize = src->requestsize;
    dst->repiterationscount = src->repiterationscount;
    _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
}


void _eigsubspacestate_clear(void* _p)
{
    eigsubspacestate *p = (eigsubspacestate*)_p;
    ae_touch_ptr((void*)p);
    _hqrndstate_clear(&p->rs);
    ae_vector_clear(&p->tau);
    ae_matrix_clear(&p->q0);
    ae_matrix_clear(&p->qcur);
    ae_matrix_clear(&p->qnew);
    ae_matrix_clear(&p->znew);
    ae_matrix_clear(&p->r);
    ae_matrix_clear(&p->rz);
    ae_matrix_clear(&p->tz);
    ae_matrix_clear(&p->rq);
    ae_matrix_clear(&p->dummy);
    ae_vector_clear(&p->rw);
    ae_vector_clear(&p->tw);
    ae_vector_clear(&p->wcur);
    ae_vector_clear(&p->wprev);
    ae_vector_clear(&p->wrank);
    _apbuffers_clear(&p->buf);
    ae_matrix_clear(&p->x);
    ae_matrix_clear(&p->ax);
    _rcommstate_clear(&p->rstate);
}


void _eigsubspacestate_destroy(void* _p)
{
    eigsubspacestate *p = (eigsubspacestate*)_p;
    ae_touch_ptr((void*)p);
    _hqrndstate_destroy(&p->rs);
    ae_vector_destroy(&p->tau);
    ae_matrix_destroy(&p->q0);
    ae_matrix_destroy(&p->qcur);
    ae_matrix_destroy(&p->qnew);
    ae_matrix_destroy(&p->znew);
    ae_matrix_destroy(&p->r);
    ae_matrix_destroy(&p->rz);
    ae_matrix_destroy(&p->tz);
    ae_matrix_destroy(&p->rq);
    ae_matrix_destroy(&p->dummy);
    ae_vector_destroy(&p->rw);
    ae_vector_destroy(&p->tw);
    ae_vector_destroy(&p->wcur);
    ae_vector_destroy(&p->wprev);
    ae_vector_destroy(&p->wrank);
    _apbuffers_destroy(&p->buf);
    ae_matrix_destroy(&p->x);
    ae_matrix_destroy(&p->ax);
    _rcommstate_destroy(&p->rstate);
}


void _eigsubspacereport_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    eigsubspacereport *p = (eigsubspacereport*)_p;
    ae_touch_ptr((void*)p);
}


void _eigsubspacereport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    eigsubspacereport *dst = (eigsubspacereport*)_dst;
    eigsubspacereport *src = (eigsubspacereport*)_src;
    dst->iterationscount = src->iterationscount;
}


void _eigsubspacereport_clear(void* _p)
{
    eigsubspacereport *p = (eigsubspacereport*)_p;
    ae_touch_ptr((void*)p);
}


void _eigsubspacereport_destroy(void* _p)
{
    eigsubspacereport *p = (eigsubspacereport*)_p;
    ae_touch_ptr((void*)p);
}


#endif
#if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Recurrent complex LU subroutine.
Never call it directly.

  -- ALGLIB routine --
     04.01.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixluprec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Complex */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t m1;
    ae_int_t m2;


    if( ae_minint(m, n, _state)<=ablascomplexblocksize(a, _state) )
    {
        dlu_cmatrixlup2(a, offs, m, n, pivots, tmp, _state);
        return;
    }
    if( m>n )
    {
        cmatrixluprec(a, offs, n, n, pivots, tmp, _state);
        for(i=0; i<=n-1; i++)
        {
            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n][offs+i], a->stride, "N", ae_v_len(0,m-n-1));
            ae_v_cmove(&a->ptr.pp_complex[offs+n][offs+i], a->stride, &a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+n,offs+m-1));
            ae_v_cmove(&a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n,offs+m-1));
        }
        cmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state);
        return;
    }
    ablascomplexsplitlength(a, m, &m1, &m2, _state);
    cmatrixluprec(a, offs, m1, n, pivots, tmp, _state);
    if( m2>0 )
    {
        for(i=0; i<=m1-1; i++)
        {
            if( offs+i!=pivots->ptr.p_int[offs+i] )
            {
                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+m1][offs+i], a->stride, "N", ae_v_len(0,m2-1));
                ae_v_cmove(&a->ptr.pp_complex[offs+m1][offs+i], a->stride, &a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+m1,offs+m-1));
                ae_v_cmove(&a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m1,offs+m-1));
            }
        }
        cmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state);
        cmatrixgemm(m-m1, n-m1, m1, ae_complex_from_d(-1.0), a, offs+m1, offs, 0, a, offs, offs+m1, 0, ae_complex_from_d(1.0), a, offs+m1, offs+m1, _state);
        cmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state);
        for(i=0; i<=m2-1; i++)
        {
            if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] )
            {
                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+m1+i], a->stride, "N", ae_v_len(0,m1-1));
                ae_v_cmove(&a->ptr.pp_complex[offs][offs+m1+i], a->stride, &a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, "N", ae_v_len(offs,offs+m1-1));
                ae_v_cmove(&a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m1-1));
            }
        }
    }
}


/*************************************************************************
Recurrent real LU subroutine.
Never call it directly.

  -- ALGLIB routine --
     04.01.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixluprec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t m1;
    ae_int_t m2;


    if( ae_minint(m, n, _state)<=ablasblocksize(a, _state) )
    {
        dlu_rmatrixlup2(a, offs, m, n, pivots, tmp, _state);
        return;
    }
    if( m>n )
    {
        rmatrixluprec(a, offs, n, n, pivots, tmp, _state);
        for(i=0; i<=n-1; i++)
        {
            if( offs+i!=pivots->ptr.p_int[offs+i] )
            {
                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n][offs+i], a->stride, ae_v_len(0,m-n-1));
                ae_v_move(&a->ptr.pp_double[offs+n][offs+i], a->stride, &a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+n,offs+m-1));
                ae_v_move(&a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n,offs+m-1));
            }
        }
        rmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state);
        return;
    }
    ablassplitlength(a, m, &m1, &m2, _state);
    rmatrixluprec(a, offs, m1, n, pivots, tmp, _state);
    if( m2>0 )
    {
        for(i=0; i<=m1-1; i++)
        {
            if( offs+i!=pivots->ptr.p_int[offs+i] )
            {
                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+m1][offs+i], a->stride, ae_v_len(0,m2-1));
                ae_v_move(&a->ptr.pp_double[offs+m1][offs+i], a->stride, &a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+m1,offs+m-1));
                ae_v_move(&a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m1,offs+m-1));
            }
        }
        rmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state);
        rmatrixgemm(m-m1, n-m1, m1, -1.0, a, offs+m1, offs, 0, a, offs, offs+m1, 0, 1.0, a, offs+m1, offs+m1, _state);
        rmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state);
        for(i=0; i<=m2-1; i++)
        {
            if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] )
            {
                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+m1+i], a->stride, ae_v_len(0,m1-1));
                ae_v_move(&a->ptr.pp_double[offs][offs+m1+i], a->stride, &a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, ae_v_len(offs,offs+m1-1));
                ae_v_move(&a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m1-1));
            }
        }
    }
}


/*************************************************************************
Recurrent complex LU subroutine.
Never call it directly.

  -- ALGLIB routine --
     04.01.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixplurec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Complex */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t tsa;
    ae_int_t tsb;


    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    if( n<=tsa )
    {
        dlu_cmatrixplu2(a, offs, m, n, pivots, tmp, _state);
        return;
    }
    if( n>m )
    {
        cmatrixplurec(a, offs, m, m, pivots, tmp, _state);
        for(i=0; i<=m-1; i++)
        {
            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+m], 1, "N", ae_v_len(0,n-m-1));
            ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+m], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, "N", ae_v_len(offs+m,offs+n-1));
            ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m,offs+n-1));
        }
        cmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state);
        return;
    }
    if( n>tsb )
    {
        n1 = tsb;
        n2 = n-n1;
    }
    else
    {
        tiledsplit(n, tsa, &n1, &n2, _state);
    }
    cmatrixplurec(a, offs, m, n1, pivots, tmp, _state);
    if( n2>0 )
    {
        for(i=0; i<=n1-1; i++)
        {
            if( offs+i!=pivots->ptr.p_int[offs+i] )
            {
                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+n1], 1, "N", ae_v_len(0,n2-1));
                ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+n1], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, "N", ae_v_len(offs+n1,offs+n-1));
                ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n1,offs+n-1));
            }
        }
        cmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state);
        cmatrixgemm(m-n1, n-n1, n1, ae_complex_from_d(-1.0), a, offs+n1, offs, 0, a, offs, offs+n1, 0, ae_complex_from_d(1.0), a, offs+n1, offs+n1, _state);
        cmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state);
        for(i=0; i<=n2-1; i++)
        {
            if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] )
            {
                ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n1+i][offs], 1, "N", ae_v_len(0,n1-1));
                ae_v_cmove(&a->ptr.pp_complex[offs+n1+i][offs], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, "N", ae_v_len(offs,offs+n1-1));
                ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+n1-1));
            }
        }
    }
}


/*************************************************************************
Recurrent real LU subroutine.
Never call it directly.

  -- ALGLIB routine --
     04.01.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixplurec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t tsa;
    ae_int_t tsb;


    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    if( n<=tsb )
    {
        if( rmatrixplumkl(a, offs, m, n, pivots, _state) )
        {
            return;
        }
    }
    if( n<=tsa )
    {
        dlu_rmatrixplu2(a, offs, m, n, pivots, tmp, _state);
        return;
    }
    if( n>m )
    {
        rmatrixplurec(a, offs, m, m, pivots, tmp, _state);
        for(i=0; i<=m-1; i++)
        {
            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+m], 1, ae_v_len(0,n-m-1));
            ae_v_move(&a->ptr.pp_double[offs+i][offs+m], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, ae_v_len(offs+m,offs+n-1));
            ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m,offs+n-1));
        }
        rmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state);
        return;
    }
    if( n>tsb )
    {
        n1 = tsb;
        n2 = n-n1;
    }
    else
    {
        tiledsplit(n, tsa, &n1, &n2, _state);
    }
    rmatrixplurec(a, offs, m, n1, pivots, tmp, _state);
    if( n2>0 )
    {
        for(i=0; i<=n1-1; i++)
        {
            if( offs+i!=pivots->ptr.p_int[offs+i] )
            {
                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(0,n2-1));
                ae_v_move(&a->ptr.pp_double[offs+i][offs+n1], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, ae_v_len(offs+n1,offs+n-1));
                ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n1,offs+n-1));
            }
        }
        rmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state);
        rmatrixgemm(m-n1, n-n1, n1, -1.0, a, offs+n1, offs, 0, a, offs, offs+n1, 0, 1.0, a, offs+n1, offs+n1, _state);
        rmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state);
        for(i=0; i<=n2-1; i++)
        {
            if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] )
            {
                ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(0,n1-1));
                ae_v_move(&a->ptr.pp_double[offs+n1+i][offs], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, ae_v_len(offs,offs+n1-1));
                ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+n1-1));
            }
        }
    }
}


/*************************************************************************
Complex LUP kernel

  -- ALGLIB routine --
     10.01.2010
     Bochkanov Sergey
*************************************************************************/
static void dlu_cmatrixlup2(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Complex */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jp;
    ae_complex s;


    if( m==0||n==0 )
    {
        return;
    }
    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
    {
        jp = j;
        for(i=j+1; i<=n-1; i++)
        {
            if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+j][offs+i], _state),ae_c_abs(a->ptr.pp_complex[offs+j][offs+jp], _state)) )
            {
                jp = i;
            }
        }
        pivots->ptr.p_int[offs+j] = offs+jp;
        if( jp!=j )
        {
            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+j], a->stride, "N", ae_v_len(0,m-1));
            ae_v_cmove(&a->ptr.pp_complex[offs][offs+j], a->stride, &a->ptr.pp_complex[offs][offs+jp], a->stride, "N", ae_v_len(offs,offs+m-1));
            ae_v_cmove(&a->ptr.pp_complex[offs][offs+jp], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m-1));
        }
        if( ae_c_neq_d(a->ptr.pp_complex[offs+j][offs+j],(double)(0))&&j+1<=n-1 )
        {
            s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
            ae_v_cmulc(&a->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s);
        }
        if( j<ae_minint(m-1, n-1, _state) )
        {
            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2));
            ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2));
            cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
        }
    }
}


/*************************************************************************
Real LUP kernel

  -- ALGLIB routine --
     10.01.2010
     Bochkanov Sergey
*************************************************************************/
static void dlu_rmatrixlup2(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jp;
    double s;


    if( m==0||n==0 )
    {
        return;
    }
    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
    {
        jp = j;
        for(i=j+1; i<=n-1; i++)
        {
            if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+j][offs+i], _state),ae_fabs(a->ptr.pp_double[offs+j][offs+jp], _state)) )
            {
                jp = i;
            }
        }
        pivots->ptr.p_int[offs+j] = offs+jp;
        if( jp!=j )
        {
            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+j], a->stride, ae_v_len(0,m-1));
            ae_v_move(&a->ptr.pp_double[offs][offs+j], a->stride, &a->ptr.pp_double[offs][offs+jp], a->stride, ae_v_len(offs,offs+m-1));
            ae_v_move(&a->ptr.pp_double[offs][offs+jp], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m-1));
        }
        if( ae_fp_neq(a->ptr.pp_double[offs+j][offs+j],(double)(0))&&j+1<=n-1 )
        {
            s = 1/a->ptr.pp_double[offs+j][offs+j];
            ae_v_muld(&a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s);
        }
        if( j<ae_minint(m-1, n-1, _state) )
        {
            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2));
            ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2));
            rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
        }
    }
}


/*************************************************************************
Complex PLU kernel

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     June 30, 1992
*************************************************************************/
static void dlu_cmatrixplu2(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Complex */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jp;
    ae_complex s;


    if( m==0||n==0 )
    {
        return;
    }
    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
    {
        jp = j;
        for(i=j+1; i<=m-1; i++)
        {
            if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+i][offs+j], _state),ae_c_abs(a->ptr.pp_complex[offs+jp][offs+j], _state)) )
            {
                jp = i;
            }
        }
        pivots->ptr.p_int[offs+j] = offs+jp;
        if( ae_c_neq_d(a->ptr.pp_complex[offs+jp][offs+j],(double)(0)) )
        {
            if( jp!=j )
            {
                for(i=0; i<=n-1; i++)
                {
                    s = a->ptr.pp_complex[offs+j][offs+i];
                    a->ptr.pp_complex[offs+j][offs+i] = a->ptr.pp_complex[offs+jp][offs+i];
                    a->ptr.pp_complex[offs+jp][offs+i] = s;
                }
            }
            if( j+1<=m-1 )
            {
                s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
                ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s);
            }
        }
        if( j<ae_minint(m, n, _state)-1 )
        {
            ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2));
            ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2));
            cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
        }
    }
}


/*************************************************************************
Real PLU kernel

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     June 30, 1992
*************************************************************************/
static void dlu_rmatrixplu2(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jp;
    double s;


    if( m==0||n==0 )
    {
        return;
    }
    for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
    {
        jp = j;
        for(i=j+1; i<=m-1; i++)
        {
            if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+i][offs+j], _state),ae_fabs(a->ptr.pp_double[offs+jp][offs+j], _state)) )
            {
                jp = i;
            }
        }
        pivots->ptr.p_int[offs+j] = offs+jp;
        if( ae_fp_neq(a->ptr.pp_double[offs+jp][offs+j],(double)(0)) )
        {
            if( jp!=j )
            {
                for(i=0; i<=n-1; i++)
                {
                    s = a->ptr.pp_double[offs+j][offs+i];
                    a->ptr.pp_double[offs+j][offs+i] = a->ptr.pp_double[offs+jp][offs+i];
                    a->ptr.pp_double[offs+jp][offs+i] = s;
                }
            }
            if( j+1<=m-1 )
            {
                s = 1/a->ptr.pp_double[offs+j][offs+j];
                ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s);
            }
        }
        if( j<ae_minint(m, n, _state)-1 )
        {
            ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2));
            ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2));
            rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
        }
    }
}


#endif
#if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Sparse LU for square NxN CRS matrix with both row and column permutations.

Represents A as Pr*L*U*Pc, where:
* Pr is a product of row permutations Pr=Pr(0)*Pr(1)*...*Pr(n-2)*Pr(n-1)
* Pc is a product of col permutations Pc=Pc(n-1)*Pc(n-2)*...*Pc(1)*Pc(0)
* L is lower unitriangular
* U is upper triangular

INPUT PARAMETERS:
    A           -   sparse square matrix in CRS format
    PivotType   -   pivot type:
                    * 0 - for best pivoting available
                    * 1 - row-only pivoting
                    * 2 - row and column greedy pivoting  algorithm  (most
                          sparse pivot column is selected from the trailing
                          matrix at each step)
    Buf         -   temporary buffer, previously allocated memory is
                    reused as much as possible

OUTPUT PARAMETERS:
    A           -   LU decomposition of A
    PR          -   array[N], row pivots
    PC          -   array[N], column pivots
    Buf         -   following fields of Buf are set:
                    * Buf.RowPermRawIdx[] - contains row permutation, with
                      RawIdx[I]=J meaning that J-th row  of  the  original
                      input matrix was moved to Ith position of the output
                      factorization

This function always succeeds  i.e. it ALWAYS returns valid factorization,
but for your convenience it also  returns boolean  value  which  helps  to
detect symbolically degenerate matrix:
* function returns TRUE if the matrix was factorized AND symbolically
  non-degenerate
* function returns FALSE if the matrix was factorized but U has strictly
  zero elements at the diagonal (the factorization is returned anyway).

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
ae_bool sptrflu(sparsematrix* a,
     ae_int_t pivottype,
     /* Integer */ ae_vector* pr,
     /* Integer */ ae_vector* pc,
     sluv2buffer* buf,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    ae_int_t i;
    ae_int_t j;
    ae_int_t jp;
    ae_int_t i0;
    ae_int_t i1;
    ae_int_t ibest;
    ae_int_t jbest;
    double v;
    double v0;
    ae_int_t nz0;
    ae_int_t nz1;
    double uu;
    ae_int_t offs;
    ae_int_t tmpndense;
    ae_bool densificationsupported;
    ae_int_t densifyabove;
    ae_bool result;


    ae_assert(sparseiscrs(a, _state), "SparseLU: A is not stored in CRS format", _state);
    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseLU: non-square A", _state);
    ae_assert((pivottype==0||pivottype==1)||pivottype==2, "SparseLU: unexpected pivot type", _state);
    result = ae_true;
    n = sparsegetnrows(a, _state);
    if( pivottype==0 )
    {
        pivottype = 2;
    }
    densificationsupported = pivottype==2;

    /*
     *
     */
    buf->n = n;
    ivectorsetlengthatleast(&buf->rowpermrawidx, n, _state);
    for(i=0; i<=n-1; i++)
    {
        buf->rowpermrawidx.ptr.p_int[i] = i;
    }

    /*
     * Allocate storage for sparse L and U factors
     *
     * NOTE: SparseMatrix structure for these factors is only
     *       partially initialized; we use it just as a temporary
     *       storage and do not intend to use facilities of the
     *       'sparse' subpackage to work with these objects.
     */
    buf->sparsel.matrixtype = 1;
    buf->sparsel.m = n;
    buf->sparsel.n = n;
    ivectorsetlengthatleast(&buf->sparsel.ridx, n+1, _state);
    buf->sparsel.ridx.ptr.p_int[0] = 0;
    buf->sparseut.matrixtype = 1;
    buf->sparseut.m = n;
    buf->sparseut.n = n;
    ivectorsetlengthatleast(&buf->sparseut.ridx, n+1, _state);
    buf->sparseut.ridx.ptr.p_int[0] = 0;

    /*
     * Allocate unprocessed yet part of the matrix,
     * two submatrices:
     * * BU, upper J rows of columns [J,N), upper submatrix
     * * BL, left J  cols of rows [J,N), left submatrix
     * * B1, (N-J)*(N-J) square submatrix
     */
    sptrf_sluv2list1init(n, &buf->bleft, _state);
    sptrf_sluv2list1init(n, &buf->bupper, _state);
    ivectorsetlengthatleast(pr, n, _state);
    ivectorsetlengthatleast(pc, n, _state);
    ivectorsetlengthatleast(&buf->v0i, n, _state);
    ivectorsetlengthatleast(&buf->v1i, n, _state);
    rvectorsetlengthatleast(&buf->v0r, n, _state);
    rvectorsetlengthatleast(&buf->v1r, n, _state);
    sptrf_sparsetrailinit(a, &buf->strail, _state);

    /*
     * Prepare dense trail, initial densification
     */
    sptrf_densetrailinit(&buf->dtrail, n, _state);
    densifyabove = ae_round(sptrf_densebnd*n, _state)+1;
    if( densificationsupported )
    {
        for(i=0; i<=n-1; i++)
        {
            if( buf->strail.nzc.ptr.p_int[i]>densifyabove )
            {
                sptrf_sparsetraildensify(&buf->strail, i, &buf->bupper, &buf->dtrail, _state);
            }
        }
    }

    /*
     * Process sparse part
     */
    for(k=0; k<=n-1; k++)
    {

        /*
         * Find pivot column and pivot row
         */
        if( !sptrf_sparsetrailfindpivot(&buf->strail, pivottype, &ibest, &jbest, _state) )
        {

            /*
             * Only densified columns are left, break sparse iteration
             */
            ae_assert(buf->dtrail.ndense+k==n, "SPTRF: integrity check failed (35741)", _state);
            break;
        }
        pc->ptr.p_int[k] = jbest;
        pr->ptr.p_int[k] = ibest;
        j = buf->rowpermrawidx.ptr.p_int[k];
        buf->rowpermrawidx.ptr.p_int[k] = buf->rowpermrawidx.ptr.p_int[ibest];
        buf->rowpermrawidx.ptr.p_int[ibest] = j;

        /*
         * Apply pivoting to BL and BU
         */
        sptrf_sluv2list1swap(&buf->bleft, k, ibest, _state);
        sptrf_sluv2list1swap(&buf->bupper, k, jbest, _state);

        /*
         * Apply pivoting to sparse trail, pivot out
         */
        sptrf_sparsetrailpivotout(&buf->strail, ibest, jbest, &uu, &buf->v0i, &buf->v0r, &nz0, &buf->v1i, &buf->v1r, &nz1, _state);
        result = result&&uu!=0;

        /*
         * Pivot dense trail
         */
        tmpndense = buf->dtrail.ndense;
        for(i=0; i<=tmpndense-1; i++)
        {
            v = buf->dtrail.d.ptr.pp_double[k][i];
            buf->dtrail.d.ptr.pp_double[k][i] = buf->dtrail.d.ptr.pp_double[ibest][i];
            buf->dtrail.d.ptr.pp_double[ibest][i] = v;
        }

        /*
         * Output to LU matrix
         */
        sptrf_sluv2list1appendsequencetomatrix(&buf->bupper, k, ae_true, uu, n, &buf->sparseut, k, _state);
        sptrf_sluv2list1appendsequencetomatrix(&buf->bleft, k, ae_false, 0.0, n, &buf->sparsel, k, _state);

        /*
         * Extract K-th col/row of B1, generate K-th col/row of BL/BU, update NZC
         */
        sptrf_sluv2list1pushsparsevector(&buf->bleft, &buf->v0i, &buf->v0r, nz0, _state);
        sptrf_sluv2list1pushsparsevector(&buf->bupper, &buf->v1i, &buf->v1r, nz1, _state);

        /*
         * Update the rest of the matrix
         */
        if( nz0*(nz1+buf->dtrail.ndense)>0 )
        {

            /*
             * Update dense trail
             *
             * NOTE: this update MUST be performed before we update sparse trail,
             *       because sparse update may move columns to dense storage after
             *       update is performed on them. Thus, we have to avoid applying
             *       same update twice.
             */
            if( buf->dtrail.ndense>0 )
            {
                tmpndense = buf->dtrail.ndense;
                for(i=0; i<=nz0-1; i++)
                {
                    i0 = buf->v0i.ptr.p_int[i];
                    v0 = buf->v0r.ptr.p_double[i];
                    for(j=0; j<=tmpndense-1; j++)
                    {
                        buf->dtrail.d.ptr.pp_double[i0][j] = buf->dtrail.d.ptr.pp_double[i0][j]-v0*buf->dtrail.d.ptr.pp_double[k][j];
                    }
                }
            }

            /*
             * Update sparse trail
             */
            sptrf_sparsetrailupdate(&buf->strail, &buf->v0i, &buf->v0r, nz0, &buf->v1i, &buf->v1r, nz1, &buf->bupper, &buf->dtrail, densificationsupported, _state);
        }
    }

    /*
     * Process densified trail
     */
    if( buf->dtrail.ndense>0 )
    {
        tmpndense = buf->dtrail.ndense;

        /*
         * Generate column pivots to bring actual order of columns in the
         * working part of the matrix to one used for dense storage
         */
        for(i=n-tmpndense; i<=n-1; i++)
        {
            k = buf->dtrail.did.ptr.p_int[i-(n-tmpndense)];
            jp = -1;
            for(j=i; j<=n-1; j++)
            {
                if( buf->strail.colid.ptr.p_int[j]==k )
                {
                    jp = j;
                    break;
                }
            }
            ae_assert(jp>=0, "SPTRF: integrity check failed during reordering", _state);
            k = buf->strail.colid.ptr.p_int[i];
            buf->strail.colid.ptr.p_int[i] = buf->strail.colid.ptr.p_int[jp];
            buf->strail.colid.ptr.p_int[jp] = k;
            pc->ptr.p_int[i] = jp;
        }

        /*
         * Perform dense LU decomposition on dense trail
         */
        rmatrixsetlengthatleast(&buf->dbuf, buf->dtrail.ndense, buf->dtrail.ndense, _state);
        for(i=0; i<=tmpndense-1; i++)
        {
            for(j=0; j<=tmpndense-1; j++)
            {
                buf->dbuf.ptr.pp_double[i][j] = buf->dtrail.d.ptr.pp_double[i+(n-tmpndense)][j];
            }
        }
        rvectorsetlengthatleast(&buf->tmp0, 2*n, _state);
        ivectorsetlengthatleast(&buf->tmpp, n, _state);
        rmatrixplurec(&buf->dbuf, 0, tmpndense, tmpndense, &buf->tmpp, &buf->tmp0, _state);

        /*
         * Convert indexes of rows pivots, swap elements of BLeft
         */
        for(i=0; i<=tmpndense-1; i++)
        {
            pr->ptr.p_int[i+(n-tmpndense)] = buf->tmpp.ptr.p_int[i]+(n-tmpndense);
            sptrf_sluv2list1swap(&buf->bleft, i+(n-tmpndense), pr->ptr.p_int[i+(n-tmpndense)], _state);
            j = buf->rowpermrawidx.ptr.p_int[i+(n-tmpndense)];
            buf->rowpermrawidx.ptr.p_int[i+(n-tmpndense)] = buf->rowpermrawidx.ptr.p_int[pr->ptr.p_int[i+(n-tmpndense)]];
            buf->rowpermrawidx.ptr.p_int[pr->ptr.p_int[i+(n-tmpndense)]] = j;
        }

        /*
         * Convert U-factor
         */
        ivectorgrowto(&buf->sparseut.idx, buf->sparseut.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
        rvectorgrowto(&buf->sparseut.vals, buf->sparseut.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
        for(j=0; j<=tmpndense-1; j++)
        {
            offs = buf->sparseut.ridx.ptr.p_int[j+(n-tmpndense)];
            k = n-tmpndense;

            /*
             * Convert leading N-NDense columns
             */
            for(i=0; i<=k-1; i++)
            {
                v = buf->dtrail.d.ptr.pp_double[i][j];
                if( v!=0 )
                {
                    buf->sparseut.idx.ptr.p_int[offs] = i;
                    buf->sparseut.vals.ptr.p_double[offs] = v;
                    offs = offs+1;
                }
            }

            /*
             * Convert upper diagonal elements
             */
            for(i=0; i<=j-1; i++)
            {
                v = buf->dbuf.ptr.pp_double[i][j];
                if( v!=0 )
                {
                    buf->sparseut.idx.ptr.p_int[offs] = i+(n-tmpndense);
                    buf->sparseut.vals.ptr.p_double[offs] = v;
                    offs = offs+1;
                }
            }

            /*
             * Convert diagonal element (always stored)
             */
            v = buf->dbuf.ptr.pp_double[j][j];
            buf->sparseut.idx.ptr.p_int[offs] = j+(n-tmpndense);
            buf->sparseut.vals.ptr.p_double[offs] = v;
            offs = offs+1;
            result = result&&v!=0;

            /*
             * Column is done
             */
            buf->sparseut.ridx.ptr.p_int[j+(n-tmpndense)+1] = offs;
        }

        /*
         * Convert L-factor
         */
        ivectorgrowto(&buf->sparsel.idx, buf->sparsel.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
        rvectorgrowto(&buf->sparsel.vals, buf->sparsel.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
        for(i=0; i<=tmpndense-1; i++)
        {
            sptrf_sluv2list1appendsequencetomatrix(&buf->bleft, i+(n-tmpndense), ae_false, 0.0, n, &buf->sparsel, i+(n-tmpndense), _state);
            offs = buf->sparsel.ridx.ptr.p_int[i+(n-tmpndense)+1];
            for(j=0; j<=i-1; j++)
            {
                v = buf->dbuf.ptr.pp_double[i][j];
                if( v!=0 )
                {
                    buf->sparsel.idx.ptr.p_int[offs] = j+(n-tmpndense);
                    buf->sparsel.vals.ptr.p_double[offs] = v;
                    offs = offs+1;
                }
            }
            buf->sparsel.ridx.ptr.p_int[i+(n-tmpndense)+1] = offs;
        }
    }

    /*
     * Allocate output
     */
    ivectorsetlengthatleast(&buf->tmpi, n, _state);
    for(i=0; i<=n-1; i++)
    {
        buf->tmpi.ptr.p_int[i] = buf->sparsel.ridx.ptr.p_int[i+1]-buf->sparsel.ridx.ptr.p_int[i];
    }
    for(i=0; i<=n-1; i++)
    {
        i0 = buf->sparseut.ridx.ptr.p_int[i];
        i1 = buf->sparseut.ridx.ptr.p_int[i+1]-1;
        for(j=i0; j<=i1; j++)
        {
            k = buf->sparseut.idx.ptr.p_int[j];
            buf->tmpi.ptr.p_int[k] = buf->tmpi.ptr.p_int[k]+1;
        }
    }
    a->matrixtype = 1;
    a->ninitialized = buf->sparsel.ridx.ptr.p_int[n]+buf->sparseut.ridx.ptr.p_int[n];
    a->m = n;
    a->n = n;
    ivectorsetlengthatleast(&a->ridx, n+1, _state);
    ivectorsetlengthatleast(&a->idx, a->ninitialized, _state);
    rvectorsetlengthatleast(&a->vals, a->ninitialized, _state);
    a->ridx.ptr.p_int[0] = 0;
    for(i=0; i<=n-1; i++)
    {
        a->ridx.ptr.p_int[i+1] = a->ridx.ptr.p_int[i]+buf->tmpi.ptr.p_int[i];
    }
    for(i=0; i<=n-1; i++)
    {
        i0 = buf->sparsel.ridx.ptr.p_int[i];
        i1 = buf->sparsel.ridx.ptr.p_int[i+1]-1;
        jp = a->ridx.ptr.p_int[i];
        for(j=i0; j<=i1; j++)
        {
            a->idx.ptr.p_int[jp+(j-i0)] = buf->sparsel.idx.ptr.p_int[j];
            a->vals.ptr.p_double[jp+(j-i0)] = buf->sparsel.vals.ptr.p_double[j];
        }
        buf->tmpi.ptr.p_int[i] = buf->sparsel.ridx.ptr.p_int[i+1]-buf->sparsel.ridx.ptr.p_int[i];
    }
    ivectorsetlengthatleast(&a->didx, n, _state);
    ivectorsetlengthatleast(&a->uidx, n, _state);
    for(i=0; i<=n-1; i++)
    {
        a->didx.ptr.p_int[i] = a->ridx.ptr.p_int[i]+buf->tmpi.ptr.p_int[i];
        a->uidx.ptr.p_int[i] = a->didx.ptr.p_int[i]+1;
        buf->tmpi.ptr.p_int[i] = a->didx.ptr.p_int[i];
    }
    for(i=0; i<=n-1; i++)
    {
        i0 = buf->sparseut.ridx.ptr.p_int[i];
        i1 = buf->sparseut.ridx.ptr.p_int[i+1]-1;
        for(j=i0; j<=i1; j++)
        {
            k = buf->sparseut.idx.ptr.p_int[j];
            offs = buf->tmpi.ptr.p_int[k];
            a->idx.ptr.p_int[offs] = i;
            a->vals.ptr.p_double[offs] = buf->sparseut.vals.ptr.p_double[j];
            buf->tmpi.ptr.p_int[k] = offs+1;
        }
    }
    return result;
}


/*************************************************************************
This function initialized rectangular submatrix structure.

After initialization this structure stores  matrix[N,0],  which contains N
rows (sequences), stored as single-linked lists.

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sluv2list1init(ae_int_t n,
     sluv2list1matrix* a,
     ae_state *_state)
{
    ae_int_t i;


    ae_assert(n>=1, "SLUV2List1Init: N<1", _state);
    a->nfixed = n;
    a->ndynamic = 0;
    a->nallocated = n;
    a->nused = 0;
    ivectorgrowto(&a->idxfirst, n, _state);
    ivectorgrowto(&a->strgidx, 2*a->nallocated, _state);
    rvectorgrowto(&a->strgval, a->nallocated, _state);
    for(i=0; i<=n-1; i++)
    {
        a->idxfirst.ptr.p_int[i] = -1;
    }
}


/*************************************************************************
This function swaps sequences #I and #J stored by the structure

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sluv2list1swap(sluv2list1matrix* a,
     ae_int_t i,
     ae_int_t j,
     ae_state *_state)
{
    ae_int_t k;


    k = a->idxfirst.ptr.p_int[i];
    a->idxfirst.ptr.p_int[i] = a->idxfirst.ptr.p_int[j];
    a->idxfirst.ptr.p_int[j] = k;
}


/*************************************************************************
This function drops sequence #I from the structure

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sluv2list1dropsequence(sluv2list1matrix* a,
     ae_int_t i,
     ae_state *_state)
{


    a->idxfirst.ptr.p_int[i] = -1;
}


/*************************************************************************
This function appends sequence from the structure to the sparse matrix.

It is assumed that S is a lower triangular  matrix,  and A stores strictly
lower triangular elements (no diagonal ones!). You can explicitly  control
whether you want to add diagonal elements or not.

Output matrix is assumed to be stored in CRS format and  to  be  partially
initialized (up to, but not including, Dst-th row). DIdx and UIdx are  NOT
updated by this function as well as NInitialized.

INPUT PARAMETERS:
    A           -   rectangular matrix structure
    Src         -   sequence (row or column) index in the structure
    HasDiagonal -   whether we want to add diagonal element
    D           -   diagonal element, if HasDiagonal=True
    NZMAX       -   maximum estimated number of non-zeros in the row,
                    this function will preallocate storage in the output
                    matrix.
    S           -   destination matrix in CRS format, partially initialized
    Dst         -   destination row index


  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sluv2list1appendsequencetomatrix(sluv2list1matrix* a,
     ae_int_t src,
     ae_bool hasdiagonal,
     double d,
     ae_int_t nzmax,
     sparsematrix* s,
     ae_int_t dst,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t i0;
    ae_int_t i1;
    ae_int_t jp;
    ae_int_t nnz;


    i0 = s->ridx.ptr.p_int[dst];
    ivectorgrowto(&s->idx, i0+nzmax, _state);
    rvectorgrowto(&s->vals, i0+nzmax, _state);
    if( hasdiagonal )
    {
        i1 = i0+nzmax-1;
        s->idx.ptr.p_int[i1] = dst;
        s->vals.ptr.p_double[i1] = d;
        nnz = 1;
    }
    else
    {
        i1 = i0+nzmax;
        nnz = 0;
    }
    jp = a->idxfirst.ptr.p_int[src];
    while(jp>=0)
    {
        i1 = i1-1;
        s->idx.ptr.p_int[i1] = a->strgidx.ptr.p_int[2*jp+1];
        s->vals.ptr.p_double[i1] = a->strgval.ptr.p_double[jp];
        nnz = nnz+1;
        jp = a->strgidx.ptr.p_int[2*jp+0];
    }
    for(i=0; i<=nnz-1; i++)
    {
        s->idx.ptr.p_int[i0+i] = s->idx.ptr.p_int[i1+i];
        s->vals.ptr.p_double[i0+i] = s->vals.ptr.p_double[i1+i];
    }
    s->ridx.ptr.p_int[dst+1] = s->ridx.ptr.p_int[dst]+nnz;
}


/*************************************************************************
This function appends sparse column to the  matrix,  increasing  its  size
from [N,K] to [N,K+1]

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sluv2list1pushsparsevector(sluv2list1matrix* a,
     /* Integer */ ae_vector* si,
     /* Real    */ ae_vector* sv,
     ae_int_t nz,
     ae_state *_state)
{
    ae_int_t idx;
    ae_int_t i;
    ae_int_t k;
    ae_int_t nused;
    double v;


    /*
     * Fetch matrix size, increase
     */
    k = a->ndynamic;
    ae_assert(k<a->nfixed, "Assertion failed", _state);
    a->ndynamic = k+1;

    /*
     * Allocate new storage if needed
     */
    nused = a->nused;
    a->nallocated = ae_maxint(a->nallocated, nused+nz, _state);
    ivectorgrowto(&a->strgidx, 2*a->nallocated, _state);
    rvectorgrowto(&a->strgval, a->nallocated, _state);

    /*
     * Append to list
     */
    for(idx=0; idx<=nz-1; idx++)
    {
        i = si->ptr.p_int[idx];
        v = sv->ptr.p_double[idx];
        a->strgidx.ptr.p_int[2*nused+0] = a->idxfirst.ptr.p_int[i];
        a->strgidx.ptr.p_int[2*nused+1] = k;
        a->strgval.ptr.p_double[nused] = v;
        a->idxfirst.ptr.p_int[i] = nused;
        nused = nused+1;
    }
    a->nused = nused;
}


/*************************************************************************
This function initializes dense trail, by default it is matrix[N,0]

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_densetrailinit(sluv2densetrail* d,
     ae_int_t n,
     ae_state *_state)
{
    ae_int_t excessivesize;


    /*
     * Note: excessive rows are allocated to accomodate for situation when
     *       this buffer is used to solve successive problems with increasing
     *       sizes.
     */
    excessivesize = ae_maxint(ae_round(1.333*n, _state), n, _state);
    d->n = n;
    d->ndense = 0;
    ivectorsetlengthatleast(&d->did, n, _state);
    if( d->d.rows<=excessivesize )
    {
        rmatrixsetlengthatleast(&d->d, n, 1, _state);
    }
    else
    {
        ae_matrix_set_length(&d->d, excessivesize, 1, _state);
    }
}


/*************************************************************************
This function appends column with id=ID to the dense trail (column IDs are
integer numbers in [0,N) which can be used to track column permutations).

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_densetrailappendcolumn(sluv2densetrail* d,
     /* Real    */ ae_vector* x,
     ae_int_t id,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t targetidx;


    n = d->n;

    /*
     * Reallocate storage
     */
    rmatrixgrowcolsto(&d->d, d->ndense+1, n, _state);

    /*
     * Copy to dense storage:
     * * BUpper
     * * BTrail
     * Remove from sparse storage
     */
    targetidx = d->ndense;
    for(i=0; i<=n-1; i++)
    {
        d->d.ptr.pp_double[i][targetidx] = x->ptr.p_double[i];
    }
    d->did.ptr.p_int[targetidx] = id;
    d->ndense = targetidx+1;
}


/*************************************************************************
This function initializes sparse trail from the sparse matrix. By default,
sparse trail spans columns and rows in [0,N)  range.  Subsequent  pivoting
out of rows/columns changes its range to [K,N), [K+1,N) and so on.

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sparsetrailinit(sparsematrix* s,
     sluv2sparsetrail* a,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t n;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t jj;
    ae_int_t p;
    ae_int_t slsused;


    ae_assert(s->m==s->n, "SparseTrailInit: M<>N", _state);
    ae_assert(s->matrixtype==1, "SparseTrailInit: non-CRS input", _state);
    n = s->n;
    a->n = s->n;
    a->k = 0;
    ivectorsetlengthatleast(&a->nzc, n, _state);
    ivectorsetlengthatleast(&a->colid, n, _state);
    rvectorsetlengthatleast(&a->tmp0, n, _state);
    for(i=0; i<=n-1; i++)
    {
        a->colid.ptr.p_int[i] = i;
    }
    bvectorsetlengthatleast(&a->isdensified, n, _state);
    for(i=0; i<=n-1; i++)
    {
        a->isdensified.ptr.p_bool[i] = ae_false;
    }

    /*
     * Working set of columns
     */
    a->maxwrkcnt = iboundval(ae_round(1+(double)n/(double)3, _state), 1, ae_minint(n, 50, _state), _state);
    a->wrkcnt = 0;
    ivectorsetlengthatleast(&a->wrkset, a->maxwrkcnt, _state);

    /*
     * Sparse linked storage (SLS). Store CRS matrix to SLS format,
     * row by row, starting from the last one.
     */
    ivectorsetlengthatleast(&a->slscolptr, n, _state);
    ivectorsetlengthatleast(&a->slsrowptr, n, _state);
    ivectorsetlengthatleast(&a->slsidx, s->ridx.ptr.p_int[n]*sptrf_slswidth, _state);
    rvectorsetlengthatleast(&a->slsval, s->ridx.ptr.p_int[n], _state);
    for(i=0; i<=n-1; i++)
    {
        a->nzc.ptr.p_int[i] = 0;
    }
    for(i=0; i<=n-1; i++)
    {
        a->slscolptr.ptr.p_int[i] = -1;
        a->slsrowptr.ptr.p_int[i] = -1;
    }
    slsused = 0;
    for(i=n-1; i>=0; i--)
    {
        j0 = s->ridx.ptr.p_int[i];
        j1 = s->ridx.ptr.p_int[i+1]-1;
        for(jj=j1; jj>=j0; jj--)
        {
            j = s->idx.ptr.p_int[jj];

            /*
             * Update non-zero counts for columns
             */
            a->nzc.ptr.p_int[j] = a->nzc.ptr.p_int[j]+1;

            /*
             * Insert into column list
             */
            p = a->slscolptr.ptr.p_int[j];
            if( p>=0 )
            {
                a->slsidx.ptr.p_int[p*sptrf_slswidth+0] = slsused;
            }
            a->slsidx.ptr.p_int[slsused*sptrf_slswidth+0] = -1;
            a->slsidx.ptr.p_int[slsused*sptrf_slswidth+1] = p;
            a->slscolptr.ptr.p_int[j] = slsused;

            /*
             * Insert into row list
             */
            p = a->slsrowptr.ptr.p_int[i];
            if( p>=0 )
            {
                a->slsidx.ptr.p_int[p*sptrf_slswidth+2] = slsused;
            }
            a->slsidx.ptr.p_int[slsused*sptrf_slswidth+2] = -1;
            a->slsidx.ptr.p_int[slsused*sptrf_slswidth+3] = p;
            a->slsrowptr.ptr.p_int[i] = slsused;

            /*
             * Store index and value
             */
            a->slsidx.ptr.p_int[slsused*sptrf_slswidth+4] = i;
            a->slsidx.ptr.p_int[slsused*sptrf_slswidth+5] = j;
            a->slsval.ptr.p_double[slsused] = s->vals.ptr.p_double[jj];
            slsused = slsused+1;
        }
    }
    a->slsused = slsused;
}


/*************************************************************************
This function searches for a appropriate pivot column/row.

If there exists non-densified column, it returns indexes of  pivot  column
and row, with most sparse column selected for column pivoting, and largest
element selected for row pivoting. Function result is True.

PivotType=1 means that no column pivoting is performed
PivotType=2 means that both column and row pivoting are supported

If all columns were densified, False is returned.

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static ae_bool sptrf_sparsetrailfindpivot(sluv2sparsetrail* a,
     ae_int_t pivottype,
     ae_int_t* ipiv,
     ae_int_t* jpiv,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    ae_int_t j;
    ae_int_t jp;
    ae_int_t entry;
    ae_int_t nz;
    ae_int_t maxwrknz;
    ae_int_t nnzbest;
    double s;
    double bbest;
    ae_int_t wrk0;
    ae_int_t wrk1;
    ae_bool result;

    *ipiv = 0;
    *jpiv = 0;

    n = a->n;
    k = a->k;
    nnzbest = n+1;
    *jpiv = -1;
    *ipiv = -1;
    result = ae_true;

    /*
     * Select pivot column
     */
    if( pivottype==1 )
    {

        /*
         * No column pivoting
         */
        ae_assert(!a->isdensified.ptr.p_bool[k], "SparseTrailFindPivot: integrity check failed", _state);
        *jpiv = k;
    }
    else
    {

        /*
         * Find pivot column
         */
        for(;;)
        {

            /*
             * Scan working set (if non-empty) for good columns
             */
            maxwrknz = a->maxwrknz;
            for(j=0; j<=a->wrkcnt-1; j++)
            {
                jp = a->wrkset.ptr.p_int[j];
                if( jp<k )
                {
                    continue;
                }
                if( a->isdensified.ptr.p_bool[jp] )
                {
                    continue;
                }
                nz = a->nzc.ptr.p_int[jp];
                if( nz>maxwrknz )
                {
                    continue;
                }
                if( *jpiv<0||nz<nnzbest )
                {
                    nnzbest = nz;
                    *jpiv = jp;
                }
            }
            if( *jpiv>=0 )
            {
                break;
            }

            /*
             * Well, nothing found. Recompute working set:
             * * determine most sparse unprocessed yet column
             * * gather all columns with density in [Wrk0,Wrk1) range,
             *   increase range, repeat, until working set is full
             */
            a->wrkcnt = 0;
            a->maxwrknz = 0;
            wrk0 = n+1;
            for(jp=k; jp<=n-1; jp++)
            {
                if( !a->isdensified.ptr.p_bool[jp]&&a->nzc.ptr.p_int[jp]<wrk0 )
                {
                    wrk0 = a->nzc.ptr.p_int[jp];
                }
            }
            if( wrk0>n )
            {

                /*
                 * Only densified columns are present, exit.
                 */
                result = ae_false;
                return result;
            }
            wrk1 = wrk0+1;
            while(a->wrkcnt<a->maxwrkcnt&&wrk0<=n)
            {

                /*
                 * Find columns with non-zero count in [Wrk0,Wrk1) range
                 */
                for(jp=k; jp<=n-1; jp++)
                {
                    if( a->wrkcnt==a->maxwrkcnt )
                    {
                        break;
                    }
                    if( a->isdensified.ptr.p_bool[jp] )
                    {
                        continue;
                    }
                    if( a->nzc.ptr.p_int[jp]>=wrk0&&a->nzc.ptr.p_int[jp]<wrk1 )
                    {
                        a->wrkset.ptr.p_int[a->wrkcnt] = jp;
                        a->wrkcnt = a->wrkcnt+1;
                        a->maxwrknz = ae_maxint(a->maxwrknz, a->nzc.ptr.p_int[jp], _state);
                    }
                }

                /*
                 * Advance scan range
                 */
                jp = ae_round(1.41*(wrk1-wrk0), _state)+1;
                wrk0 = wrk1;
                wrk1 = wrk0+jp;
            }
        }
    }

    /*
     * Select pivot row
     */
    bbest = (double)(0);
    entry = a->slscolptr.ptr.p_int[*jpiv];
    while(entry>=0)
    {
        s = ae_fabs(a->slsval.ptr.p_double[entry], _state);
        if( *ipiv<0||ae_fp_greater(s,bbest) )
        {
            bbest = s;
            *ipiv = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
        }
        entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
    }
    if( *ipiv<0 )
    {
        *ipiv = k;
    }
    return result;
}


/*************************************************************************
This function pivots out specified row and column.

Sparse trail range changes from [K,N) to [K+1,N).

V0I, V0R, V1I, V1R must be preallocated arrays[N].

Following data are returned:
* UU - diagonal element (pivoted out), can be zero
* V0I, V0R, NZ0 - sparse column pivoted out to the left (after permutation
  is applied to its elements) and divided by UU.
  V0I is array[NZ0] which stores row indexes in [K+1,N) range, V0R  stores
  values.
* V1I, V1R, NZ1 - sparse row pivoted out to the top.

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sparsetrailpivotout(sluv2sparsetrail* a,
     ae_int_t ipiv,
     ae_int_t jpiv,
     double* uu,
     /* Integer */ ae_vector* v0i,
     /* Real    */ ae_vector* v0r,
     ae_int_t* nz0,
     /* Integer */ ae_vector* v1i,
     /* Real    */ ae_vector* v1r,
     ae_int_t* nz1,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    ae_int_t i;
    ae_int_t j;
    ae_int_t entry;
    double v;
    double s;
    ae_bool vb;
    ae_int_t pos0k;
    ae_int_t pos0piv;
    ae_int_t pprev;
    ae_int_t pnext;
    ae_int_t pnextnext;

    *uu = 0;
    *nz0 = 0;
    *nz1 = 0;

    n = a->n;
    k = a->k;
    ae_assert(k<n, "SparseTrailPivotOut: integrity check failed", _state);

    /*
     * Pivot out column JPiv from the sparse linked storage:
     * * remove column JPiv from the matrix
     * * update column K:
     *   * change element indexes after it is permuted to JPiv
     *   * resort rows affected by move K->JPiv
     *
     * NOTE: this code leaves V0I/V0R/NZ0 in the unfinalized state,
     *       i.e. these arrays do not account for pivoting performed
     *       on rows. They will be post-processed later.
     */
    *nz0 = 0;
    pos0k = -1;
    pos0piv = -1;
    entry = a->slscolptr.ptr.p_int[jpiv];
    while(entry>=0)
    {

        /*
         * Offload element
         */
        i = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
        v0i->ptr.p_int[*nz0] = i;
        v0r->ptr.p_double[*nz0] = a->slsval.ptr.p_double[entry];
        if( i==k )
        {
            pos0k = *nz0;
        }
        if( i==ipiv )
        {
            pos0piv = *nz0;
        }
        *nz0 = *nz0+1;

        /*
         * Remove element from the row list
         */
        pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+2];
        pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
        if( pprev>=0 )
        {
            a->slsidx.ptr.p_int[pprev*sptrf_slswidth+3] = pnext;
        }
        else
        {
            a->slsrowptr.ptr.p_int[i] = pnext;
        }
        if( pnext>=0 )
        {
            a->slsidx.ptr.p_int[pnext*sptrf_slswidth+2] = pprev;
        }

        /*
         * Select next entry
         */
        entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
    }
    entry = a->slscolptr.ptr.p_int[k];
    a->slscolptr.ptr.p_int[jpiv] = entry;
    while(entry>=0)
    {

        /*
         * Change column index
         */
        a->slsidx.ptr.p_int[entry*sptrf_slswidth+5] = jpiv;

        /*
         * Next entry
         */
        entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
    }

    /*
     * Post-process V0, account for pivoting.
     * Compute diagonal element UU.
     */
    *uu = (double)(0);
    if( pos0k>=0||pos0piv>=0 )
    {

        /*
         * Apply permutation to rows of pivoted out column, specific
         * implementation depends on the sparsity at locations #Pos0K
         * and #Pos0Piv of the V0 array.
         */
        if( pos0k>=0&&pos0piv>=0 )
        {

            /*
             * Obtain diagonal element
             */
            *uu = v0r->ptr.p_double[pos0piv];
            if( *uu!=0 )
            {
                s = 1/(*uu);
            }
            else
            {
                s = (double)(1);
            }

            /*
             * Move pivoted out element, shift array by one in order
             * to remove heading diagonal element (not needed here
             * anymore).
             */
            v0r->ptr.p_double[pos0piv] = v0r->ptr.p_double[pos0k];
            for(i=0; i<=*nz0-2; i++)
            {
                v0i->ptr.p_int[i] = v0i->ptr.p_int[i+1];
                v0r->ptr.p_double[i] = v0r->ptr.p_double[i+1]*s;
            }
            *nz0 = *nz0-1;
        }
        if( pos0k>=0&&pos0piv<0 )
        {

            /*
             * Diagonal element is zero
             */
            *uu = (double)(0);

            /*
             * Pivot out element, reorder array
             */
            v0i->ptr.p_int[pos0k] = ipiv;
            for(i=pos0k; i<=*nz0-2; i++)
            {
                if( v0i->ptr.p_int[i]<v0i->ptr.p_int[i+1] )
                {
                    break;
                }
                j = v0i->ptr.p_int[i];
                v0i->ptr.p_int[i] = v0i->ptr.p_int[i+1];
                v0i->ptr.p_int[i+1] = j;
                v = v0r->ptr.p_double[i];
                v0r->ptr.p_double[i] = v0r->ptr.p_double[i+1];
                v0r->ptr.p_double[i+1] = v;
            }
        }
        if( pos0k<0&&pos0piv>=0 )
        {

            /*
             * Get diagonal element
             */
            *uu = v0r->ptr.p_double[pos0piv];
            if( *uu!=0 )
            {
                s = 1/(*uu);
            }
            else
            {
                s = (double)(1);
            }

            /*
             * Shift array past the pivoted in element by one
             * in order to remove pivot
             */
            for(i=0; i<=pos0piv-1; i++)
            {
                v0r->ptr.p_double[i] = v0r->ptr.p_double[i]*s;
            }
            for(i=pos0piv; i<=*nz0-2; i++)
            {
                v0i->ptr.p_int[i] = v0i->ptr.p_int[i+1];
                v0r->ptr.p_double[i] = v0r->ptr.p_double[i+1]*s;
            }
            *nz0 = *nz0-1;
        }
    }

    /*
     * Pivot out row IPiv from the sparse linked storage:
     * * remove row IPiv from the matrix
     * * reindex elements of row K after it is permuted to IPiv
     * * apply permutation to the cols of the pivoted out row,
     *   resort columns
     */
    *nz1 = 0;
    entry = a->slsrowptr.ptr.p_int[ipiv];
    while(entry>=0)
    {

        /*
         * Offload element
         */
        j = a->slsidx.ptr.p_int[entry*sptrf_slswidth+5];
        v1i->ptr.p_int[*nz1] = j;
        v1r->ptr.p_double[*nz1] = a->slsval.ptr.p_double[entry];
        *nz1 = *nz1+1;

        /*
         * Remove element from the column list
         */
        pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+0];
        pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
        if( pprev>=0 )
        {
            a->slsidx.ptr.p_int[pprev*sptrf_slswidth+1] = pnext;
        }
        else
        {
            a->slscolptr.ptr.p_int[j] = pnext;
        }
        if( pnext>=0 )
        {
            a->slsidx.ptr.p_int[pnext*sptrf_slswidth+0] = pprev;
        }

        /*
         * Select next entry
         */
        entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
    }
    a->slsrowptr.ptr.p_int[ipiv] = a->slsrowptr.ptr.p_int[k];
    entry = a->slsrowptr.ptr.p_int[ipiv];
    while(entry>=0)
    {

        /*
         * Change row index
         */
        a->slsidx.ptr.p_int[entry*sptrf_slswidth+4] = ipiv;

        /*
         * Resort column affected by row pivoting
         */
        j = a->slsidx.ptr.p_int[entry*sptrf_slswidth+5];
        pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+0];
        pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
        while(pnext>=0&&a->slsidx.ptr.p_int[pnext*sptrf_slswidth+4]<ipiv)
        {
            pnextnext = a->slsidx.ptr.p_int[pnext*sptrf_slswidth+1];

            /*
             * prev->next
             */
            if( pprev>=0 )
            {
                a->slsidx.ptr.p_int[pprev*sptrf_slswidth+1] = pnext;
            }
            else
            {
                a->slscolptr.ptr.p_int[j] = pnext;
            }

            /*
             * entry->prev, entry->next
             */
            a->slsidx.ptr.p_int[entry*sptrf_slswidth+0] = pnext;
            a->slsidx.ptr.p_int[entry*sptrf_slswidth+1] = pnextnext;

            /*
             * next->prev, next->next
             */
            a->slsidx.ptr.p_int[pnext*sptrf_slswidth+0] = pprev;
            a->slsidx.ptr.p_int[pnext*sptrf_slswidth+1] = entry;

            /*
             * nextnext->prev
             */
            if( pnextnext>=0 )
            {
                a->slsidx.ptr.p_int[pnextnext*sptrf_slswidth+0] = entry;
            }

            /*
             * PPrev, Item, PNext
             */
            pprev = pnext;
            pnext = pnextnext;
        }

        /*
         * Next entry
         */
        entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
    }

    /*
     * Reorder other structures
     */
    i = a->nzc.ptr.p_int[k];
    a->nzc.ptr.p_int[k] = a->nzc.ptr.p_int[jpiv];
    a->nzc.ptr.p_int[jpiv] = i;
    i = a->colid.ptr.p_int[k];
    a->colid.ptr.p_int[k] = a->colid.ptr.p_int[jpiv];
    a->colid.ptr.p_int[jpiv] = i;
    vb = a->isdensified.ptr.p_bool[k];
    a->isdensified.ptr.p_bool[k] = a->isdensified.ptr.p_bool[jpiv];
    a->isdensified.ptr.p_bool[jpiv] = vb;

    /*
     * Handle removal of col/row #K
     */
    for(i=0; i<=*nz1-1; i++)
    {
        j = v1i->ptr.p_int[i];
        a->nzc.ptr.p_int[j] = a->nzc.ptr.p_int[j]-1;
    }
    a->k = a->k+1;
}


/*************************************************************************
This function densifies I1-th column of the sparse trail.

PARAMETERS:
    A           -   sparse trail
    I1          -   column index
    BUpper      -   upper rectangular submatrix, updated during densification
                    of the columns (densified columns are removed)
    DTrail      -   dense trail, receives densified columns from sparse
                    trail and BUpper

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sparsetraildensify(sluv2sparsetrail* a,
     ae_int_t i1,
     sluv2list1matrix* bupper,
     sluv2densetrail* dtrail,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    ae_int_t i;
    ae_int_t jp;
    ae_int_t entry;
    ae_int_t pprev;
    ae_int_t pnext;


    n = a->n;
    k = a->k;
    ae_assert(k<n, "SparseTrailDensify: integrity check failed", _state);
    ae_assert(k<=i1, "SparseTrailDensify: integrity check failed", _state);
    ae_assert(!a->isdensified.ptr.p_bool[i1], "SparseTrailDensify: integrity check failed", _state);

    /*
     * Offload items [0,K) of densified column from BUpper
     */
    for(i=0; i<=n-1; i++)
    {
        a->tmp0.ptr.p_double[i] = (double)(0);
    }
    jp = bupper->idxfirst.ptr.p_int[i1];
    while(jp>=0)
    {
        a->tmp0.ptr.p_double[bupper->strgidx.ptr.p_int[2*jp+1]] = bupper->strgval.ptr.p_double[jp];
        jp = bupper->strgidx.ptr.p_int[2*jp+0];
    }
    sptrf_sluv2list1dropsequence(bupper, i1, _state);

    /*
     * Offload items [K,N) of densified column from BLeft
     */
    entry = a->slscolptr.ptr.p_int[i1];
    while(entry>=0)
    {

        /*
         * Offload element
         */
        i = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
        a->tmp0.ptr.p_double[i] = a->slsval.ptr.p_double[entry];

        /*
         * Remove element from the row list
         */
        pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+2];
        pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
        if( pprev>=0 )
        {
            a->slsidx.ptr.p_int[pprev*sptrf_slswidth+3] = pnext;
        }
        else
        {
            a->slsrowptr.ptr.p_int[i] = pnext;
        }
        if( pnext>=0 )
        {
            a->slsidx.ptr.p_int[pnext*sptrf_slswidth+2] = pprev;
        }

        /*
         * Select next entry
         */
        entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
    }

    /*
     * Densify
     */
    a->nzc.ptr.p_int[i1] = 0;
    a->isdensified.ptr.p_bool[i1] = ae_true;
    a->slscolptr.ptr.p_int[i1] = -1;
    sptrf_densetrailappendcolumn(dtrail, &a->tmp0, a->colid.ptr.p_int[i1], _state);
}


/*************************************************************************
This function appends rank-1 update to the sparse trail.  Dense  trail  is
not  updated  here,  but  we  may  move some columns to dense trail during
update (i.e. densify them). Thus, you have to update  dense  trail  BEFORE
you start updating sparse one (otherwise, recently densified columns  will
be updated twice).

PARAMETERS:
    A           -   sparse trail
    V0I, V0R    -   update column returned by SparseTrailPivotOut (MUST be
                    array[N] independently of the NZ0).
    NZ0         -   non-zero count for update column
    V1I, V1R    -   update row returned by SparseTrailPivotOut
    NZ1         -   non-zero count for update row
    BUpper      -   upper rectangular submatrix, updated during densification
                    of the columns (densified columns are removed)
    DTrail      -   dense trail, receives densified columns from sparse
                    trail and BUpper
    DensificationSupported- if False, no densification is performed

  -- ALGLIB routine --
     15.01.2019
     Bochkanov Sergey
*************************************************************************/
static void sptrf_sparsetrailupdate(sluv2sparsetrail* a,
     /* Integer */ ae_vector* v0i,
     /* Real    */ ae_vector* v0r,
     ae_int_t nz0,
     /* Integer */ ae_vector* v1i,
     /* Real    */ ae_vector* v1r,
     ae_int_t nz1,
     sluv2list1matrix* bupper,
     sluv2densetrail* dtrail,
     ae_bool densificationsupported,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    ae_int_t i;
    ae_int_t j;
    ae_int_t i0;
    ae_int_t i1;
    double v1;
    ae_int_t densifyabove;
    ae_int_t nnz;
    ae_int_t entry;
    ae_int_t newentry;
    ae_int_t pprev;
    ae_int_t pnext;
    ae_int_t p;
    ae_int_t nexti;
    ae_int_t newoffs;


    n = a->n;
    k = a->k;
    ae_assert(k<n, "SparseTrailPivotOut: integrity check failed", _state);
    densifyabove = ae_round(sptrf_densebnd*(n-k), _state)+1;
    ae_assert(v0i->cnt>=nz0+1, "SparseTrailUpdate: integrity check failed", _state);
    ae_assert(v0r->cnt>=nz0+1, "SparseTrailUpdate: integrity check failed", _state);
    v0i->ptr.p_int[nz0] = -1;
    v0r->ptr.p_double[nz0] = (double)(0);

    /*
     * Update sparse representation
     */
    ivectorgrowto(&a->slsidx, (a->slsused+nz0*nz1)*sptrf_slswidth, _state);
    rvectorgrowto(&a->slsval, a->slsused+nz0*nz1, _state);
    for(j=0; j<=nz1-1; j++)
    {
        if( nz0==0 )
        {
            continue;
        }
        i1 = v1i->ptr.p_int[j];
        v1 = v1r->ptr.p_double[j];

        /*
         * Update column #I1
         */
        nnz = a->nzc.ptr.p_int[i1];
        i = 0;
        i0 = v0i->ptr.p_int[i];
        entry = a->slscolptr.ptr.p_int[i1];
        pprev = -1;
        while(i<nz0)
        {

            /*
             * Handle possible fill-in happening BEFORE already existing
             * entry of the column list (or simply fill-in, if no entry
             * is present).
             */
            pnext = entry;
            if( entry>=0 )
            {
                nexti = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
            }
            else
            {
                nexti = n+1;
            }
            while(i<nz0)
            {
                if( i0>=nexti )
                {
                    break;
                }

                /*
                 * Allocate new entry, store column/row/value
                 */
                newentry = a->slsused;
                a->slsused = newentry+1;
                nnz = nnz+1;
                newoffs = newentry*sptrf_slswidth;
                a->slsidx.ptr.p_int[newoffs+4] = i0;
                a->slsidx.ptr.p_int[newoffs+5] = i1;
                a->slsval.ptr.p_double[newentry] = -v1*v0r->ptr.p_double[i];

                /*
                 * Insert entry into column list
                 */
                a->slsidx.ptr.p_int[newoffs+0] = pprev;
                a->slsidx.ptr.p_int[newoffs+1] = pnext;
                if( pprev>=0 )
                {
                    a->slsidx.ptr.p_int[pprev*sptrf_slswidth+1] = newentry;
                }
                else
                {
                    a->slscolptr.ptr.p_int[i1] = newentry;
                }
                if( entry>=0 )
                {
                    a->slsidx.ptr.p_int[entry*sptrf_slswidth+0] = newentry;
                }

                /*
                 * Insert entry into row list
                 */
                p = a->slsrowptr.ptr.p_int[i0];
                a->slsidx.ptr.p_int[newoffs+2] = -1;
                a->slsidx.ptr.p_int[newoffs+3] = p;
                if( p>=0 )
                {
                    a->slsidx.ptr.p_int[p*sptrf_slswidth+2] = newentry;
                }
                a->slsrowptr.ptr.p_int[i0] = newentry;

                /*
                 * Advance pointers
                 */
                pprev = newentry;
                i = i+1;
                i0 = v0i->ptr.p_int[i];
            }
            if( i>=nz0 )
            {
                break;
            }

            /*
             * Update already existing entry of the column list, if needed
             */
            if( entry>=0 )
            {
                if( i0==nexti )
                {
                    a->slsval.ptr.p_double[entry] = a->slsval.ptr.p_double[entry]-v1*v0r->ptr.p_double[i];
                    i = i+1;
                    i0 = v0i->ptr.p_int[i];
                }
                pprev = entry;
            }

            /*
             * Advance to the next pre-existing entry (if present)
             */
            if( entry>=0 )
            {
                entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
            }
        }
        a->nzc.ptr.p_int[i1] = nnz;

        /*
         * Densify column if needed
         */
        if( (densificationsupported&&nnz>densifyabove)&&!a->isdensified.ptr.p_bool[i1] )
        {
            sptrf_sparsetraildensify(a, i1, bupper, dtrail, _state);
        }
    }
}


void _sluv2list1matrix_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    sluv2list1matrix *p = (sluv2list1matrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->idxfirst, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->strgidx, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->strgval, 0, DT_REAL, _state, make_automatic);
}


void _sluv2list1matrix_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    sluv2list1matrix *dst = (sluv2list1matrix*)_dst;
    sluv2list1matrix *src = (sluv2list1matrix*)_src;
    dst->nfixed = src->nfixed;
    dst->ndynamic = src->ndynamic;
    ae_vector_init_copy(&dst->idxfirst, &src->idxfirst, _state, make_automatic);
    ae_vector_init_copy(&dst->strgidx, &src->strgidx, _state, make_automatic);
    ae_vector_init_copy(&dst->strgval, &src->strgval, _state, make_automatic);
    dst->nallocated = src->nallocated;
    dst->nused = src->nused;
}


void _sluv2list1matrix_clear(void* _p)
{
    sluv2list1matrix *p = (sluv2list1matrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->idxfirst);
    ae_vector_clear(&p->strgidx);
    ae_vector_clear(&p->strgval);
}


void _sluv2list1matrix_destroy(void* _p)
{
    sluv2list1matrix *p = (sluv2list1matrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->idxfirst);
    ae_vector_destroy(&p->strgidx);
    ae_vector_destroy(&p->strgval);
}


void _sluv2sparsetrail_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    sluv2sparsetrail *p = (sluv2sparsetrail*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->nzc, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->wrkset, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->colid, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->isdensified, 0, DT_BOOL, _state, make_automatic);
    ae_vector_init(&p->slscolptr, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->slsrowptr, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->slsidx, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->slsval, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
}


void _sluv2sparsetrail_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    sluv2sparsetrail *dst = (sluv2sparsetrail*)_dst;
    sluv2sparsetrail *src = (sluv2sparsetrail*)_src;
    dst->n = src->n;
    dst->k = src->k;
    ae_vector_init_copy(&dst->nzc, &src->nzc, _state, make_automatic);
    dst->maxwrkcnt = src->maxwrkcnt;
    dst->maxwrknz = src->maxwrknz;
    dst->wrkcnt = src->wrkcnt;
    ae_vector_init_copy(&dst->wrkset, &src->wrkset, _state, make_automatic);
    ae_vector_init_copy(&dst->colid, &src->colid, _state, make_automatic);
    ae_vector_init_copy(&dst->isdensified, &src->isdensified, _state, make_automatic);
    ae_vector_init_copy(&dst->slscolptr, &src->slscolptr, _state, make_automatic);
    ae_vector_init_copy(&dst->slsrowptr, &src->slsrowptr, _state, make_automatic);
    ae_vector_init_copy(&dst->slsidx, &src->slsidx, _state, make_automatic);
    ae_vector_init_copy(&dst->slsval, &src->slsval, _state, make_automatic);
    dst->slsused = src->slsused;
    ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
}


void _sluv2sparsetrail_clear(void* _p)
{
    sluv2sparsetrail *p = (sluv2sparsetrail*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->nzc);
    ae_vector_clear(&p->wrkset);
    ae_vector_clear(&p->colid);
    ae_vector_clear(&p->isdensified);
    ae_vector_clear(&p->slscolptr);
    ae_vector_clear(&p->slsrowptr);
    ae_vector_clear(&p->slsidx);
    ae_vector_clear(&p->slsval);
    ae_vector_clear(&p->tmp0);
}


void _sluv2sparsetrail_destroy(void* _p)
{
    sluv2sparsetrail *p = (sluv2sparsetrail*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->nzc);
    ae_vector_destroy(&p->wrkset);
    ae_vector_destroy(&p->colid);
    ae_vector_destroy(&p->isdensified);
    ae_vector_destroy(&p->slscolptr);
    ae_vector_destroy(&p->slsrowptr);
    ae_vector_destroy(&p->slsidx);
    ae_vector_destroy(&p->slsval);
    ae_vector_destroy(&p->tmp0);
}


void _sluv2densetrail_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    sluv2densetrail *p = (sluv2densetrail*)_p;
    ae_touch_ptr((void*)p);
    ae_matrix_init(&p->d, 0, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->did, 0, DT_INT, _state, make_automatic);
}


void _sluv2densetrail_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    sluv2densetrail *dst = (sluv2densetrail*)_dst;
    sluv2densetrail *src = (sluv2densetrail*)_src;
    dst->n = src->n;
    dst->ndense = src->ndense;
    ae_matrix_init_copy(&dst->d, &src->d, _state, make_automatic);
    ae_vector_init_copy(&dst->did, &src->did, _state, make_automatic);
}


void _sluv2densetrail_clear(void* _p)
{
    sluv2densetrail *p = (sluv2densetrail*)_p;
    ae_touch_ptr((void*)p);
    ae_matrix_clear(&p->d);
    ae_vector_clear(&p->did);
}


void _sluv2densetrail_destroy(void* _p)
{
    sluv2densetrail *p = (sluv2densetrail*)_p;
    ae_touch_ptr((void*)p);
    ae_matrix_destroy(&p->d);
    ae_vector_destroy(&p->did);
}


void _sluv2buffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    sluv2buffer *p = (sluv2buffer*)_p;
    ae_touch_ptr((void*)p);
    _sparsematrix_init(&p->sparsel, _state, make_automatic);
    _sparsematrix_init(&p->sparseut, _state, make_automatic);
    _sluv2list1matrix_init(&p->bleft, _state, make_automatic);
    _sluv2list1matrix_init(&p->bupper, _state, make_automatic);
    _sluv2sparsetrail_init(&p->strail, _state, make_automatic);
    _sluv2densetrail_init(&p->dtrail, _state, make_automatic);
    ae_vector_init(&p->rowpermrawidx, 0, DT_INT, _state, make_automatic);
    ae_matrix_init(&p->dbuf, 0, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->v0i, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->v1i, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->v0r, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->v1r, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->tmpi, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->tmpp, 0, DT_INT, _state, make_automatic);
}


void _sluv2buffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    sluv2buffer *dst = (sluv2buffer*)_dst;
    sluv2buffer *src = (sluv2buffer*)_src;
    dst->n = src->n;
    _sparsematrix_init_copy(&dst->sparsel, &src->sparsel, _state, make_automatic);
    _sparsematrix_init_copy(&dst->sparseut, &src->sparseut, _state, make_automatic);
    _sluv2list1matrix_init_copy(&dst->bleft, &src->bleft, _state, make_automatic);
    _sluv2list1matrix_init_copy(&dst->bupper, &src->bupper, _state, make_automatic);
    _sluv2sparsetrail_init_copy(&dst->strail, &src->strail, _state, make_automatic);
    _sluv2densetrail_init_copy(&dst->dtrail, &src->dtrail, _state, make_automatic);
    ae_vector_init_copy(&dst->rowpermrawidx, &src->rowpermrawidx, _state, make_automatic);
    ae_matrix_init_copy(&dst->dbuf, &src->dbuf, _state, make_automatic);
    ae_vector_init_copy(&dst->v0i, &src->v0i, _state, make_automatic);
    ae_vector_init_copy(&dst->v1i, &src->v1i, _state, make_automatic);
    ae_vector_init_copy(&dst->v0r, &src->v0r, _state, make_automatic);
    ae_vector_init_copy(&dst->v1r, &src->v1r, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
    ae_vector_init_copy(&dst->tmpi, &src->tmpi, _state, make_automatic);
    ae_vector_init_copy(&dst->tmpp, &src->tmpp, _state, make_automatic);
}


void _sluv2buffer_clear(void* _p)
{
    sluv2buffer *p = (sluv2buffer*)_p;
    ae_touch_ptr((void*)p);
    _sparsematrix_clear(&p->sparsel);
    _sparsematrix_clear(&p->sparseut);
    _sluv2list1matrix_clear(&p->bleft);
    _sluv2list1matrix_clear(&p->bupper);
    _sluv2sparsetrail_clear(&p->strail);
    _sluv2densetrail_clear(&p->dtrail);
    ae_vector_clear(&p->rowpermrawidx);
    ae_matrix_clear(&p->dbuf);
    ae_vector_clear(&p->v0i);
    ae_vector_clear(&p->v1i);
    ae_vector_clear(&p->v0r);
    ae_vector_clear(&p->v1r);
    ae_vector_clear(&p->tmp0);
    ae_vector_clear(&p->tmpi);
    ae_vector_clear(&p->tmpp);
}


void _sluv2buffer_destroy(void* _p)
{
    sluv2buffer *p = (sluv2buffer*)_p;
    ae_touch_ptr((void*)p);
    _sparsematrix_destroy(&p->sparsel);
    _sparsematrix_destroy(&p->sparseut);
    _sluv2list1matrix_destroy(&p->bleft);
    _sluv2list1matrix_destroy(&p->bupper);
    _sluv2sparsetrail_destroy(&p->strail);
    _sluv2densetrail_destroy(&p->dtrail);
    ae_vector_destroy(&p->rowpermrawidx);
    ae_matrix_destroy(&p->dbuf);
    ae_vector_destroy(&p->v0i);
    ae_vector_destroy(&p->v1i);
    ae_vector_destroy(&p->v0r);
    ae_vector_destroy(&p->v1r);
    ae_vector_destroy(&p->tmp0);
    ae_vector_destroy(&p->tmpi);
    ae_vector_destroy(&p->tmpp);
}


#endif
#if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
This function generates approximate minimum degree ordering

INPUT PARAMETERS
    A           -   lower triangular sparse matrix  in  CRS  format.  Only
                    sparsity structure (as given by Idx[] field)  matters,
                    specific values of matrix elements are ignored.
    N           -   problem size
    Buf         -   reusable buffer object, does not need special initialization

OUTPUT PARAMETERS
    Perm        -   array[N], maps original indexes I to permuted indexes
    InvPerm     -   array[N], maps permuted indexes I to original indexes

NOTE: definite 'DEBUG.SLOW' trace tag will  activate  extra-slow  (roughly
      N^3 ops) integrity checks, in addition to cheap O(1) ones.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
void generateamdpermutation(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* perm,
     /* Integer */ ae_vector* invperm,
     amdbuffer* buf,
     ae_state *_state)
{
    ae_int_t r;


    r = generateamdpermutationx(a, n, perm, invperm, 0, buf, _state);
    ae_assert(r==n, "GenerateAMDPermutation: integrity check failed, the matrix is only partially processed", _state);
}


/*************************************************************************
This  function  generates  approximate  minimum  degree ordering,   either
classic or improved with better support for dense rows:
* the classic version processed entire matrix and returns N as result. The
  problem with classic version is that it may be slow  for  matrices  with
  dense or nearly dense rows
* the improved version processes K most sparse rows, and moves  other  N-K
  ones to the end. The number of sparse rows  K  is  returned.  The  tail,
  which is now a (N-K)*(N-K) matrix, should be repeatedly processed by the
  same function until zero is returned.

INPUT PARAMETERS
    A           -   lower triangular sparse matrix in CRS format
    N           -   problem size
    AMDType     -   ordering type:
                    * 0 for the classic AMD
                    * 1 for the improved AMD
    Buf         -   reusable buffer object, does not need special initialization

OUTPUT PARAMETERS
    Perm        -   array[N], maps original indexes I to permuted indexes
    InvPerm     -   array[N], maps permuted indexes I to original indexes

RESULT:
    number of successfully ordered rows/cols;
    N for AMDType=0, 0<Result<=N for AMDType=1

NOTE: defining 'DEBUG.SLOW' trace tag will  activate  extra-slow  (roughly
      N^3 ops) integrity checks, in addition to cheap O(1) ones.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
ae_int_t generateamdpermutationx(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* perm,
     /* Integer */ ae_vector* invperm,
     ae_int_t amdtype,
     amdbuffer* buf,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t p;
    ae_int_t setprealloc;
    ae_int_t inithashbucketsize;
    ae_bool extendeddebug;
    ae_int_t nodesize;
    ae_int_t cnt0;
    ae_int_t cnt1;
    ae_int_t tau;
    double meand;
    ae_int_t d;
    ae_int_t result;


    ae_assert(amdtype==0||amdtype==1, "GenerateAMDPermutationX: unexpected ordering type", _state);
    setprealloc = 3;
    inithashbucketsize = 16;
    extendeddebug = ae_is_trace_enabled("DEBUG.SLOW")&&n<=100;
    result = n;
    buf->n = n;
    buf->checkexactdegrees = extendeddebug;
    buf->extendeddebug = extendeddebug;
    amdordering_mtxinit(n, &buf->mtxl, _state);
    amdordering_knsinitfroma(a, n, &buf->seta, _state);
    amdordering_knsinit(n, n, setprealloc, &buf->setsuper, _state);
    for(i=0; i<=n-1; i++)
    {
        amdordering_knsaddnewelement(&buf->setsuper, i, i, _state);
    }
    amdordering_knsinit(n, n, setprealloc, &buf->sete, _state);
    amdordering_knsinit(n, n, inithashbucketsize, &buf->hashbuckets, _state);
    amdordering_nsinitemptyslow(n, &buf->nonemptybuckets, _state);
    ivectorsetlengthatleast(&buf->perm, n, _state);
    ivectorsetlengthatleast(&buf->invperm, n, _state);
    ivectorsetlengthatleast(&buf->columnswaps, n, _state);
    for(i=0; i<=n-1; i++)
    {
        buf->perm.ptr.p_int[i] = i;
        buf->invperm.ptr.p_int[i] = i;
        buf->columnswaps.ptr.p_int[i] = i;
    }
    amdordering_vtxinit(a, n, buf->checkexactdegrees, &buf->vertexdegrees, _state);
    bsetallocv(n, ae_true, &buf->issupernode, _state);
    bsetallocv(n, ae_false, &buf->iseliminated, _state);
    isetallocv(n, -1, &buf->arrwe, _state);
    if( extendeddebug )
    {
        ae_matrix_set_length(&buf->dbga, n, n, _state);
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                if( (j<=i&&sparseexists(a, i, j, _state))||(j>=i&&sparseexists(a, j, i, _state)) )
                {
                    buf->dbga.ptr.pp_double[i][j] = 0.1/n*(ae_sin(i+0.17, _state)+ae_cos(ae_sqrt(j+0.65, _state), _state));
                }
                else
                {
                    buf->dbga.ptr.pp_double[i][j] = (double)(0);
                }
            }
        }
        for(i=0; i<=n-1; i++)
        {
            buf->dbga.ptr.pp_double[i][i] = (double)(1);
        }
    }
    tau = 0;
    if( amdtype==1 )
    {
        meand = 0.0;
        for(i=0; i<=n-1; i++)
        {
            d = amdordering_vtxgetapprox(&buf->vertexdegrees, i, _state);
            meand = meand+d;
        }
        meand = meand/n;
        tau = ae_round(10*meand, _state)+2;
    }
    ivectorsetlengthatleast(&buf->ls, n, _state);
    amdordering_nsinitemptyslow(n, &buf->setp, _state);
    amdordering_nsinitemptyslow(n, &buf->lp, _state);
    amdordering_nsinitemptyslow(n, &buf->setrp, _state);
    amdordering_nsinitemptyslow(n, &buf->ep, _state);
    amdordering_nsinitemptyslow(n, &buf->exactdegreetmp0, _state);
    amdordering_nsinitemptyslow(n, &buf->adji, _state);
    amdordering_nsinitemptyslow(n, &buf->adjj, _state);
    amdordering_nsinitemptyslow(n, &buf->setq, _state);
    amdordering_nsinitemptyslow(n, &buf->setqsupercand, _state);
    k = 0;
    while(k<n-amdordering_nscount(&buf->setq, _state))
    {
        amdordering_amdselectpivotelement(buf, k, &p, &nodesize, _state);
        amdordering_amdcomputelp(buf, p, _state);
        amdordering_amdmasselimination(buf, p, k, tau, _state);
        amdordering_nsstartenumeration(&buf->setqsupercand, _state);
        while(amdordering_nsenumerate(&buf->setqsupercand, &j, _state))
        {
            ae_assert(j!=p, "AMD: integrity check 9464 failed", _state);
            ae_assert(buf->issupernode.ptr.p_bool[j], "AMD: integrity check 6284 failed", _state);
            ae_assert(!buf->iseliminated.ptr.p_bool[j], "AMD: integrity check 3858 failed", _state);
            amdordering_knsstartenumeration(&buf->setsuper, j, _state);
            while(amdordering_knsenumerate(&buf->setsuper, &i, _state))
            {
                amdordering_nsaddelement(&buf->setq, i, _state);
            }
            amdordering_knsclearkthreclaim(&buf->seta, j, _state);
            amdordering_knsclearkthreclaim(&buf->sete, j, _state);
            buf->issupernode.ptr.p_bool[j] = ae_false;
            amdordering_vtxremovevertex(&buf->vertexdegrees, j, _state);
        }
        amdordering_amddetectsupernodes(buf, _state);
        if( extendeddebug )
        {
            ae_assert(buf->checkexactdegrees, "AMD: extended debug needs exact degrees", _state);
            for(i=k; i<=k+nodesize-1; i++)
            {
                if( buf->columnswaps.ptr.p_int[i]!=i )
                {
                    swaprows(&buf->dbga, i, buf->columnswaps.ptr.p_int[i], n, _state);
                    swapcols(&buf->dbga, i, buf->columnswaps.ptr.p_int[i], n, _state);
                }
            }
            for(i=0; i<=nodesize-1; i++)
            {
                rmatrixgemm(n-k-i, n-k-i, k+i, -1.0, &buf->dbga, k+i, 0, 0, &buf->dbga, 0, k+i, 0, 1.0, &buf->dbga, k+i, k+i, _state);
            }
            cnt0 = amdordering_nscount(&buf->lp, _state);
            cnt1 = 0;
            for(i=k+1; i<=n-1; i++)
            {
                if( ae_fp_neq(buf->dbga.ptr.pp_double[i][k],(double)(0)) )
                {
                    inc(&cnt1, _state);
                }
            }
            ae_assert(cnt0+nodesize-1==cnt1, "AMD: integrity check 7344 failed", _state);
            ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, p, _state)>=amdordering_vtxgetexact(&buf->vertexdegrees, p, _state), "AMD: integrity check for ApproxD failed", _state);
            ae_assert(amdordering_vtxgetexact(&buf->vertexdegrees, p, _state)==cnt0, "AMD: integrity check for ExactD failed", _state);
        }
        ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, p, _state)>=amdordering_nscount(&buf->lp, _state), "AMD: integrity check 7956 failed", _state);
        ae_assert((amdordering_knscountkth(&buf->sete, p, _state)>2||amdordering_nscount(&buf->setq, _state)>0)||amdordering_vtxgetapprox(&buf->vertexdegrees, p, _state)==amdordering_nscount(&buf->lp, _state), "AMD: integrity check 7295 failed", _state);
        amdordering_knsstartenumeration(&buf->sete, p, _state);
        while(amdordering_knsenumerate(&buf->sete, &j, _state))
        {
            amdordering_mtxclearcolumn(&buf->mtxl, j, _state);
        }
        amdordering_knsstartenumeration(&buf->setsuper, p, _state);
        while(amdordering_knsenumerate(&buf->setsuper, &j, _state))
        {
            buf->iseliminated.ptr.p_bool[j] = ae_true;
            amdordering_mtxclearrow(&buf->mtxl, j, _state);
        }
        amdordering_knsclearkthreclaim(&buf->seta, p, _state);
        amdordering_knsclearkthreclaim(&buf->sete, p, _state);
        buf->issupernode.ptr.p_bool[p] = ae_false;
        amdordering_vtxremovevertex(&buf->vertexdegrees, p, _state);
        k = k+nodesize;
    }
    ae_assert(k+amdordering_nscount(&buf->setq, _state)==n, "AMD: integrity check 6326 failed", _state);
    ae_assert(k>0, "AMD: integrity check 9463 failed", _state);
    result = k;
    ivectorsetlengthatleast(perm, n, _state);
    ivectorsetlengthatleast(invperm, n, _state);
    for(i=0; i<=n-1; i++)
    {
        perm->ptr.p_int[i] = buf->perm.ptr.p_int[i];
        invperm->ptr.p_int[i] = buf->invperm.ptr.p_int[i];
    }
    return result;
}


/*************************************************************************
Initializes n-set by empty structure.

IMPORTANT: this function need O(N) time for initialization. It is recommended
           to reduce its usage as much as possible, and use nsClear()
           where possible.

INPUT PARAMETERS
    N           -   possible set size

OUTPUT PARAMETERS
    SA          -   empty N-set

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nsinitemptyslow(ae_int_t n,
     amdnset* sa,
     ae_state *_state)
{


    sa->n = n;
    sa->nstored = 0;
    isetallocv(n, -999999999, &sa->locationof, _state);
    isetallocv(n, -999999999, &sa->items, _state);
}


/*************************************************************************
Copies n-set to properly initialized target set. The target set has to  be
properly initialized, and it can be non-empty. If  it  is  non-empty,  its
contents is quickly erased before copying.

The cost of this function is O(max(SrcSize,DstSize))

INPUT PARAMETERS
    SSrc        -   source N-set
    SDst        -   destination N-set (has same size as SSrc)

OUTPUT PARAMETERS
    SDst        -   copy of SSrc

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nscopy(amdnset* ssrc,
     amdnset* sdst,
     ae_state *_state)
{
    ae_int_t ns;
    ae_int_t i;
    ae_int_t k;


    amdordering_nsclear(sdst, _state);
    ns = ssrc->nstored;
    for(i=0; i<=ns-1; i++)
    {
        k = ssrc->items.ptr.p_int[i];
        sdst->items.ptr.p_int[i] = k;
        sdst->locationof.ptr.p_int[k] = i;
    }
    sdst->nstored = ns;
}


/*************************************************************************
Add K-th element to the set. The element may already exist in the set.

INPUT PARAMETERS
    SA          -   set
    K           -   element to add, 0<=K<N.

OUTPUT PARAMETERS
    SA          -   modified SA

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nsaddelement(amdnset* sa,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t ns;


    if( sa->locationof.ptr.p_int[k]>=0 )
    {
        return;
    }
    ns = sa->nstored;
    sa->locationof.ptr.p_int[k] = ns;
    sa->items.ptr.p_int[ns] = k;
    sa->nstored = ns+1;
}


/*************************************************************************
Add K-th set from the source kn-set

INPUT PARAMETERS
    SA          -   set
    Src, K      -   source kn-set and set index K

OUTPUT PARAMETERS
    SA          -   modified SA

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nsaddkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxbegin;
    ae_int_t idxend;
    ae_int_t j;
    ae_int_t ns;


    idxbegin = src->vbegin.ptr.p_int[k];
    idxend = idxbegin+src->vcnt.ptr.p_int[k];
    ns = sa->nstored;
    while(idxbegin<idxend)
    {
        j = src->data.ptr.p_int[idxbegin];
        if( sa->locationof.ptr.p_int[j]<0 )
        {
            sa->locationof.ptr.p_int[j] = ns;
            sa->items.ptr.p_int[ns] = j;
            ns = ns+1;
        }
        idxbegin = idxbegin+1;
    }
    sa->nstored = ns;
}


/*************************************************************************
Subtracts K-th set from the source structure

INPUT PARAMETERS
    SA          -   set
    Src, K      -   source kn-set and set index K

OUTPUT PARAMETERS
    SA          -   modified SA

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nssubtract1(amdnset* sa,
     amdnset* src,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t loc;
    ae_int_t item;
    ae_int_t ns;
    ae_int_t ss;


    ns = sa->nstored;
    ss = src->nstored;
    if( ss<ns )
    {
        for(i=0; i<=ss-1; i++)
        {
            j = src->items.ptr.p_int[i];
            loc = sa->locationof.ptr.p_int[j];
            if( loc>=0 )
            {
                item = sa->items.ptr.p_int[ns-1];
                sa->items.ptr.p_int[loc] = item;
                sa->locationof.ptr.p_int[item] = loc;
                sa->locationof.ptr.p_int[j] = -1;
                ns = ns-1;
            }
        }
    }
    else
    {
        i = 0;
        while(i<ns)
        {
            j = sa->items.ptr.p_int[i];
            loc = src->locationof.ptr.p_int[j];
            if( loc>=0 )
            {
                item = sa->items.ptr.p_int[ns-1];
                sa->items.ptr.p_int[i] = item;
                sa->locationof.ptr.p_int[item] = i;
                sa->locationof.ptr.p_int[j] = -1;
                ns = ns-1;
            }
            else
            {
                i = i+1;
            }
        }
    }
    sa->nstored = ns;
}


/*************************************************************************
Subtracts K-th set from the source structure

INPUT PARAMETERS
    SA          -   set
    Src, K      -   source kn-set and set index K

OUTPUT PARAMETERS
    SA          -   modified SA

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nssubtractkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxbegin;
    ae_int_t idxend;
    ae_int_t j;
    ae_int_t loc;
    ae_int_t ns;
    ae_int_t item;


    idxbegin = src->vbegin.ptr.p_int[k];
    idxend = idxbegin+src->vcnt.ptr.p_int[k];
    ns = sa->nstored;
    while(idxbegin<idxend)
    {
        j = src->data.ptr.p_int[idxbegin];
        loc = sa->locationof.ptr.p_int[j];
        if( loc>=0 )
        {
            item = sa->items.ptr.p_int[ns-1];
            sa->items.ptr.p_int[loc] = item;
            sa->locationof.ptr.p_int[item] = loc;
            sa->locationof.ptr.p_int[j] = -1;
            ns = ns-1;
        }
        idxbegin = idxbegin+1;
    }
    sa->nstored = ns;
}


/*************************************************************************
Clears set

INPUT PARAMETERS
    SA          -   set to be cleared


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nsclear(amdnset* sa, ae_state *_state)
{
    ae_int_t i;
    ae_int_t ns;


    ns = sa->nstored;
    for(i=0; i<=ns-1; i++)
    {
        sa->locationof.ptr.p_int[sa->items.ptr.p_int[i]] = -1;
    }
    sa->nstored = 0;
}


/*************************************************************************
Counts set elements

INPUT PARAMETERS
    SA          -   set

RESULT
    number of elements in SA

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_nscount(amdnset* sa, ae_state *_state)
{
    ae_int_t result;


    result = sa->nstored;
    return result;
}


/*************************************************************************
Counts set elements not present in the K-th set of the source structure

INPUT PARAMETERS
    SA          -   set
    Src, K      -   source kn-set and set index K

RESULT
    number of elements in SA not present in Src[K]

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_nscountnotkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxbegin;
    ae_int_t idxend;
    ae_int_t intersectcnt;
    ae_int_t result;


    idxbegin = src->vbegin.ptr.p_int[k];
    idxend = idxbegin+src->vcnt.ptr.p_int[k];
    intersectcnt = 0;
    while(idxbegin<idxend)
    {
        if( sa->locationof.ptr.p_int[src->data.ptr.p_int[idxbegin]]>=0 )
        {
            intersectcnt = intersectcnt+1;
        }
        idxbegin = idxbegin+1;
    }
    result = sa->nstored-intersectcnt;
    return result;
}


/*************************************************************************
Counts set elements also present in the K-th set of the source structure

INPUT PARAMETERS
    SA          -   set
    Src, K      -   source kn-set and set index K

RESULT
    number of elements in SA also present in Src[K]

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_nscountandkth(amdnset* sa,
     amdknset* src,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxbegin;
    ae_int_t idxend;
    ae_int_t result;


    idxbegin = src->vbegin.ptr.p_int[k];
    idxend = idxbegin+src->vcnt.ptr.p_int[k];
    result = 0;
    while(idxbegin<idxend)
    {
        if( sa->locationof.ptr.p_int[src->data.ptr.p_int[idxbegin]]>=0 )
        {
            result = result+1;
        }
        idxbegin = idxbegin+1;
    }
    return result;
}


/*************************************************************************
Compare two sets, returns True for equal sets

INPUT PARAMETERS
    S0          -   set 0
    S1          -   set 1, must have same parameter N as set 0

RESULT
    True, if sets are equal

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_bool amdordering_nsequal(amdnset* s0,
     amdnset* s1,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t ns0;
    ae_int_t ns1;
    ae_bool result;


    result = ae_false;
    if( s0->n!=s1->n )
    {
        return result;
    }
    if( s0->nstored!=s1->nstored )
    {
        return result;
    }
    ns0 = s0->nstored;
    ns1 = s1->nstored;
    for(i=0; i<=ns0-1; i++)
    {
        if( s1->locationof.ptr.p_int[s0->items.ptr.p_int[i]]<0 )
        {
            return result;
        }
    }
    for(i=0; i<=ns1-1; i++)
    {
        if( s0->locationof.ptr.p_int[s1->items.ptr.p_int[i]]<0 )
        {
            return result;
        }
    }
    result = ae_true;
    return result;
}


/*************************************************************************
Prepares iteration over set

INPUT PARAMETERS
    SA          -   set

OUTPUT PARAMETERS
    SA          -   SA ready for repeated calls of nsEnumerate()

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_nsstartenumeration(amdnset* sa, ae_state *_state)
{


    sa->iteridx = 0;
}


/*************************************************************************
Iterates over the set. Subsequent calls return True and set J to  new  set
item until iteration stops and False is returned.

INPUT PARAMETERS
    SA          -   n-set

OUTPUT PARAMETERS
    J           -   if:
                    * Result=True - index of element in the set
                    * Result=False - not set


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_bool amdordering_nsenumerate(amdnset* sa,
     ae_int_t* i,
     ae_state *_state)
{
    ae_int_t k;
    ae_bool result;

    *i = 0;

    k = sa->iteridx;
    if( k>=sa->nstored )
    {
        result = ae_false;
        return result;
    }
    *i = sa->items.ptr.p_int[k];
    sa->iteridx = k+1;
    result = ae_true;
    return result;
}


/*************************************************************************
Compresses internal storage, reclaiming previously dropped blocks. To be
used internally by kn-set modification functions.

INPUT PARAMETERS
    SA          -   kn-set to compress

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knscompressstorage(amdknset* sa, ae_state *_state)
{
    ae_int_t i;
    ae_int_t blocklen;
    ae_int_t setidx;
    ae_int_t srcoffs;
    ae_int_t dstoffs;


    srcoffs = 0;
    dstoffs = 0;
    while(srcoffs<sa->dataused)
    {
        blocklen = sa->data.ptr.p_int[srcoffs+0];
        setidx = sa->data.ptr.p_int[srcoffs+1];
        ae_assert(blocklen>=amdordering_knsheadersize, "knsCompressStorage: integrity check 6385 failed", _state);
        if( setidx<0 )
        {
            srcoffs = srcoffs+blocklen;
            continue;
        }
        if( srcoffs!=dstoffs )
        {
            for(i=0; i<=blocklen-1; i++)
            {
                sa->data.ptr.p_int[dstoffs+i] = sa->data.ptr.p_int[srcoffs+i];
            }
            sa->vbegin.ptr.p_int[setidx] = dstoffs+amdordering_knsheadersize;
        }
        dstoffs = dstoffs+blocklen;
        srcoffs = srcoffs+blocklen;
    }
    ae_assert(srcoffs==sa->dataused, "knsCompressStorage: integrity check 9464 failed", _state);
    sa->dataused = dstoffs;
}


/*************************************************************************
Reallocates internal storage for set #SetIdx, increasing its  capacity  to
NewAllocated exactly. This function may invalidate internal  pointers  for
ALL   sets  in  the  kn-set  structure  because  it  may  perform  storage
compression in order to reclaim previously freed space.

INPUT PARAMETERS
    SA          -   kn-set structure
    SetIdx      -   set to reallocate
    NewAllocated -  new size for the set, must be at least equal to already
                    allocated

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsreallocate(amdknset* sa,
     ae_int_t setidx,
     ae_int_t newallocated,
     ae_state *_state)
{
    ae_int_t oldbegin;
    ae_int_t oldcnt;
    ae_int_t newbegin;
    ae_int_t j;


    if( sa->data.cnt<sa->dataused+amdordering_knsheadersize+newallocated )
    {
        amdordering_knscompressstorage(sa, _state);
        if( sa->data.cnt<sa->dataused+amdordering_knsheadersize+newallocated )
        {
            ivectorgrowto(&sa->data, sa->dataused+amdordering_knsheadersize+newallocated, _state);
        }
    }
    oldbegin = sa->vbegin.ptr.p_int[setidx];
    oldcnt = sa->vcnt.ptr.p_int[setidx];
    newbegin = sa->dataused+amdordering_knsheadersize;
    sa->vbegin.ptr.p_int[setidx] = newbegin;
    sa->vallocated.ptr.p_int[setidx] = newallocated;
    sa->data.ptr.p_int[oldbegin-1] = -1;
    sa->data.ptr.p_int[newbegin-2] = amdordering_knsheadersize+newallocated;
    sa->data.ptr.p_int[newbegin-1] = setidx;
    sa->dataused = sa->dataused+sa->data.ptr.p_int[newbegin-2];
    for(j=0; j<=oldcnt-1; j++)
    {
        sa->data.ptr.p_int[newbegin+j] = sa->data.ptr.p_int[oldbegin+j];
    }
}


/*************************************************************************
Initialize kn-set

INPUT PARAMETERS
    K           -   sets count
    N           -   set size
    kPrealloc   -   preallocate place per set (can be zero)

OUTPUT PARAMETERS
    SA          -   K sets of N elements, initially empty

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsinit(ae_int_t k,
     ae_int_t n,
     ae_int_t kprealloc,
     amdknset* sa,
     ae_state *_state)
{
    ae_int_t i;


    sa->k = n;
    sa->n = n;
    isetallocv(n, -1, &sa->flagarray, _state);
    isetallocv(n, kprealloc, &sa->vallocated, _state);
    ivectorsetlengthatleast(&sa->vbegin, n, _state);
    sa->vbegin.ptr.p_int[0] = amdordering_knsheadersize;
    for(i=1; i<=n-1; i++)
    {
        sa->vbegin.ptr.p_int[i] = sa->vbegin.ptr.p_int[i-1]+sa->vallocated.ptr.p_int[i-1]+amdordering_knsheadersize;
    }
    sa->dataused = sa->vbegin.ptr.p_int[n-1]+sa->vallocated.ptr.p_int[n-1];
    ivectorsetlengthatleast(&sa->data, sa->dataused, _state);
    for(i=0; i<=n-1; i++)
    {
        sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-2] = amdordering_knsheadersize+sa->vallocated.ptr.p_int[i];
        sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-1] = i;
    }
    isetallocv(n, 0, &sa->vcnt, _state);
}


/*************************************************************************
Initialize kn-set from lower triangle of symmetric A

INPUT PARAMETERS
    A           -   lower triangular sparse matrix in CRS format
    N           -   problem size

OUTPUT PARAMETERS
    SA          -   N sets of N elements, reproducing both lower and upper
                    triangles of A

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsinitfroma(sparsematrix* a,
     ae_int_t n,
     amdknset* sa,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jj;
    ae_int_t j0;
    ae_int_t j1;


    sa->k = n;
    sa->n = n;
    isetallocv(n, -1, &sa->flagarray, _state);
    ivectorsetlengthatleast(&sa->vallocated, n, _state);
    for(i=0; i<=n-1; i++)
    {
        ae_assert(a->didx.ptr.p_int[i]<a->uidx.ptr.p_int[i], "knsInitFromA: integrity check for diagonal of A failed", _state);
        j0 = a->ridx.ptr.p_int[i];
        j1 = a->didx.ptr.p_int[i]-1;
        sa->vallocated.ptr.p_int[i] = 1+(j1-j0+1);
        for(jj=j0; jj<=j1; jj++)
        {
            j = a->idx.ptr.p_int[jj];
            sa->vallocated.ptr.p_int[j] = sa->vallocated.ptr.p_int[j]+1;
        }
    }
    ivectorsetlengthatleast(&sa->vbegin, n, _state);
    sa->vbegin.ptr.p_int[0] = amdordering_knsheadersize;
    for(i=1; i<=n-1; i++)
    {
        sa->vbegin.ptr.p_int[i] = sa->vbegin.ptr.p_int[i-1]+sa->vallocated.ptr.p_int[i-1]+amdordering_knsheadersize;
    }
    sa->dataused = sa->vbegin.ptr.p_int[n-1]+sa->vallocated.ptr.p_int[n-1];
    ivectorsetlengthatleast(&sa->data, sa->dataused, _state);
    for(i=0; i<=n-1; i++)
    {
        sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-2] = amdordering_knsheadersize+sa->vallocated.ptr.p_int[i];
        sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-1] = i;
    }
    isetallocv(n, 0, &sa->vcnt, _state);
    for(i=0; i<=n-1; i++)
    {
        sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]+sa->vcnt.ptr.p_int[i]] = i;
        sa->vcnt.ptr.p_int[i] = sa->vcnt.ptr.p_int[i]+1;
        j0 = a->ridx.ptr.p_int[i];
        j1 = a->didx.ptr.p_int[i]-1;
        for(jj=j0; jj<=j1; jj++)
        {
            j = a->idx.ptr.p_int[jj];
            sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]+sa->vcnt.ptr.p_int[i]] = j;
            sa->data.ptr.p_int[sa->vbegin.ptr.p_int[j]+sa->vcnt.ptr.p_int[j]] = i;
            sa->vcnt.ptr.p_int[i] = sa->vcnt.ptr.p_int[i]+1;
            sa->vcnt.ptr.p_int[j] = sa->vcnt.ptr.p_int[j]+1;
        }
    }
}


/*************************************************************************
Prepares iteration over I-th set

INPUT PARAMETERS
    SA          -   kn-set
    I           -   set index

OUTPUT PARAMETERS
    SA          -   SA ready for repeated calls of knsEnumerate()

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsstartenumeration(amdknset* sa,
     ae_int_t i,
     ae_state *_state)
{


    sa->iterrow = i;
    sa->iteridx = 0;
}


/*************************************************************************
Iterates over I-th set (as specified during recent knsStartEnumeration call).
Subsequent calls return True and set J to new set item until iteration
stops and False is returned.

INPUT PARAMETERS
    SA          -   kn-set

OUTPUT PARAMETERS
    J           -   if:
                    * Result=True - index of element in the set
                    * Result=False - not set


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_bool amdordering_knsenumerate(amdknset* sa,
     ae_int_t* i,
     ae_state *_state)
{
    ae_bool result;

    *i = 0;

    if( sa->iteridx<sa->vcnt.ptr.p_int[sa->iterrow] )
    {
        *i = sa->data.ptr.p_int[sa->vbegin.ptr.p_int[sa->iterrow]+sa->iteridx];
        sa->iteridx = sa->iteridx+1;
        result = ae_true;
    }
    else
    {
        result = ae_false;
    }
    return result;
}


/*************************************************************************
Allows direct access to internal storage  of  kn-set  structure  - returns
range of elements SA.Data[idxBegin...idxEnd-1] used to store K-th set

INPUT PARAMETERS
    SA          -   kn-set
    K           -   set index

OUTPUT PARAMETERS
    idxBegin,
    idxEnd      -   half-range [idxBegin,idxEnd) of SA.Data that stores
                    K-th set


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsdirectaccess(amdknset* sa,
     ae_int_t k,
     ae_int_t* idxbegin,
     ae_int_t* idxend,
     ae_state *_state)
{

    *idxbegin = 0;
    *idxend = 0;

    *idxbegin = sa->vbegin.ptr.p_int[k];
    *idxend = *idxbegin+sa->vcnt.ptr.p_int[k];
}


/*************************************************************************
Add K-th element to I-th set. The caller guarantees that  the  element  is
not present in the target set.

INPUT PARAMETERS
    SA          -   kn-set
    I           -   set index
    K           -   element to add

OUTPUT PARAMETERS
    SA          -   modified SA

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsaddnewelement(amdknset* sa,
     ae_int_t i,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t cnt;


    cnt = sa->vcnt.ptr.p_int[i];
    if( cnt==sa->vallocated.ptr.p_int[i] )
    {
        amdordering_knsreallocate(sa, i, 2*sa->vallocated.ptr.p_int[i]+1, _state);
    }
    sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]+cnt] = k;
    sa->vcnt.ptr.p_int[i] = cnt+1;
}


/*************************************************************************
Subtracts source n-set from the I-th set of the destination kn-set.

INPUT PARAMETERS
    SA          -   destination kn-set structure
    I           -   set index in the structure
    Src         -   source n-set

OUTPUT PARAMETERS
    SA          -   I-th set except for elements in Src

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knssubtract1(amdknset* sa,
     ae_int_t i,
     amdnset* src,
     ae_state *_state)
{
    ae_int_t j;
    ae_int_t idxbegin;
    ae_int_t idxend;
    ae_int_t cnt;


    cnt = sa->vcnt.ptr.p_int[i];
    idxbegin = sa->vbegin.ptr.p_int[i];
    idxend = idxbegin+cnt;
    while(idxbegin<idxend)
    {
        j = sa->data.ptr.p_int[idxbegin];
        if( src->locationof.ptr.p_int[j]>=0 )
        {
            sa->data.ptr.p_int[idxbegin] = sa->data.ptr.p_int[idxend-1];
            idxend = idxend-1;
            cnt = cnt-1;
        }
        else
        {
            idxbegin = idxbegin+1;
        }
    }
    sa->vcnt.ptr.p_int[i] = cnt;
}


/*************************************************************************
Adds Kth set of the source kn-set to the I-th destination set. The  caller
guarantees that SA[I] and Src[J] do NOT intersect, i.e. do not have shared
elements - it allows to use faster algorithms.

INPUT PARAMETERS
    SA          -   destination kn-set structure
    I           -   set index in the structure
    Src         -   source kn-set
    K           -   set index

OUTPUT PARAMETERS
    SA          -   I-th set plus for elements in K-th set of Src

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsaddkthdistinct(amdknset* sa,
     ae_int_t i,
     amdknset* src,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxdst;
    ae_int_t idxsrcbegin;
    ae_int_t cnt;
    ae_int_t srccnt;
    ae_int_t j;


    cnt = sa->vcnt.ptr.p_int[i];
    srccnt = src->vcnt.ptr.p_int[k];
    if( cnt+srccnt>sa->vallocated.ptr.p_int[i] )
    {
        amdordering_knsreallocate(sa, i, 2*(cnt+srccnt)+1, _state);
    }
    idxsrcbegin = src->vbegin.ptr.p_int[k];
    idxdst = sa->vbegin.ptr.p_int[i]+cnt;
    for(j=0; j<=srccnt-1; j++)
    {
        sa->data.ptr.p_int[idxdst] = src->data.ptr.p_int[idxsrcbegin+j];
        idxdst = idxdst+1;
    }
    sa->vcnt.ptr.p_int[i] = cnt+srccnt;
}


/*************************************************************************
Counts elements of K-th set of S0

INPUT PARAMETERS
    S0          -   kn-set structure
    K           -   set index in the structure S0

RESULT
    K-th set element count

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_knscountkth(amdknset* s0,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t result;


    result = s0->vcnt.ptr.p_int[k];
    return result;
}


/*************************************************************************
Counts elements of I-th set of S0 not present in S1

INPUT PARAMETERS
    S0          -   kn-set structure
    I           -   set index in the structure S0
    S1          -   kn-set to compare against

RESULT
    count

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_knscountnot(amdknset* s0,
     ae_int_t i,
     amdnset* s1,
     ae_state *_state)
{
    ae_int_t idxbegin0;
    ae_int_t cnt0;
    ae_int_t j;
    ae_int_t result;


    cnt0 = s0->vcnt.ptr.p_int[i];
    idxbegin0 = s0->vbegin.ptr.p_int[i];
    result = 0;
    for(j=0; j<=cnt0-1; j++)
    {
        if( s1->locationof.ptr.p_int[s0->data.ptr.p_int[idxbegin0+j]]<0 )
        {
            result = result+1;
        }
    }
    return result;
}


/*************************************************************************
Counts elements of I-th set of S0 not present in K-th set of S1

INPUT PARAMETERS
    S0          -   kn-set structure
    I           -   set index in the structure S0
    S1          -   kn-set to compare against
    K           -   set index in the structure S1

RESULT
    count

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_knscountnotkth(amdknset* s0,
     ae_int_t i,
     amdknset* s1,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxbegin0;
    ae_int_t idxbegin1;
    ae_int_t cnt0;
    ae_int_t cnt1;
    ae_int_t j;
    ae_int_t result;


    cnt0 = s0->vcnt.ptr.p_int[i];
    cnt1 = s1->vcnt.ptr.p_int[k];
    idxbegin0 = s0->vbegin.ptr.p_int[i];
    idxbegin1 = s1->vbegin.ptr.p_int[k];
    for(j=0; j<=cnt1-1; j++)
    {
        s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = 1;
    }
    result = 0;
    for(j=0; j<=cnt0-1; j++)
    {
        if( s0->flagarray.ptr.p_int[s0->data.ptr.p_int[idxbegin0+j]]<0 )
        {
            result = result+1;
        }
    }
    for(j=0; j<=cnt1-1; j++)
    {
        s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = -1;
    }
    return result;
}


/*************************************************************************
Counts elements of I-th set of S0 that are also present in K-th set of S1

INPUT PARAMETERS
    S0          -   kn-set structure
    I           -   set index in the structure S0
    S1          -   kn-set to compare against
    K           -   set index in the structure S1

RESULT
    count

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_knscountandkth(amdknset* s0,
     ae_int_t i,
     amdknset* s1,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxbegin0;
    ae_int_t idxbegin1;
    ae_int_t cnt0;
    ae_int_t cnt1;
    ae_int_t j;
    ae_int_t result;


    cnt0 = s0->vcnt.ptr.p_int[i];
    cnt1 = s1->vcnt.ptr.p_int[k];
    idxbegin0 = s0->vbegin.ptr.p_int[i];
    idxbegin1 = s1->vbegin.ptr.p_int[k];
    for(j=0; j<=cnt1-1; j++)
    {
        s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = 1;
    }
    result = 0;
    for(j=0; j<=cnt0-1; j++)
    {
        if( s0->flagarray.ptr.p_int[s0->data.ptr.p_int[idxbegin0+j]]>0 )
        {
            result = result+1;
        }
    }
    for(j=0; j<=cnt1-1; j++)
    {
        s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = -1;
    }
    return result;
}


/*************************************************************************
Sums elements in I-th set of S0, returns sum.

INPUT PARAMETERS
    S0          -   kn-set structure
    I           -   set index in the structure S0

RESULT
    sum

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_knssumkth(amdknset* s0,
     ae_int_t i,
     ae_state *_state)
{
    ae_int_t idxbegin0;
    ae_int_t cnt0;
    ae_int_t j;
    ae_int_t result;


    cnt0 = s0->vcnt.ptr.p_int[i];
    idxbegin0 = s0->vbegin.ptr.p_int[i];
    result = 0;
    for(j=0; j<=cnt0-1; j++)
    {
        result = result+s0->data.ptr.p_int[idxbegin0+j];
    }
    return result;
}


/*************************************************************************
Clear k-th kn-set in collection.

Freed memory is NOT reclaimed for future garbage collection.

INPUT PARAMETERS
    SA          -   kn-set structure
    K           -   set index

OUTPUT PARAMETERS
    SA          -   K-th set was cleared

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsclearkthnoreclaim(amdknset* sa,
     ae_int_t k,
     ae_state *_state)
{


    sa->vcnt.ptr.p_int[k] = 0;
}


/*************************************************************************
Clear k-th kn-set in collection.

Freed memory is reclaimed for future garbage collection. This function  is
NOT recommended if you intend to add elements to this set in some  future,
because every addition will result in  reallocation  of  previously  freed
memory. Use knsClearKthNoReclaim().

INPUT PARAMETERS
    SA          -   kn-set structure
    K           -   set index

OUTPUT PARAMETERS
    SA          -   K-th set was cleared

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_knsclearkthreclaim(amdknset* sa,
     ae_int_t k,
     ae_state *_state)
{
    ae_int_t idxbegin;
    ae_int_t allocated;


    idxbegin = sa->vbegin.ptr.p_int[k];
    allocated = sa->vallocated.ptr.p_int[k];
    sa->vcnt.ptr.p_int[k] = 0;
    if( allocated>=amdordering_knsheadersize )
    {
        sa->data.ptr.p_int[idxbegin-2] = 2;
        sa->data.ptr.p_int[idxbegin+0] = allocated;
        sa->data.ptr.p_int[idxbegin+1] = -1;
        sa->vallocated.ptr.p_int[k] = 0;
    }
}


/*************************************************************************
Initialize linked list matrix

INPUT PARAMETERS
    N           -   matrix size

OUTPUT PARAMETERS
    A           -   NxN linked list matrix

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_mtxinit(ae_int_t n,
     amdllmatrix* a,
     ae_state *_state)
{


    a->n = n;
    isetallocv(2*n+1, -1, &a->vbegin, _state);
    isetallocv(n, 0, &a->vcolcnt, _state);
    a->entriesinitialized = 0;
}


/*************************************************************************
Adds column from matrix to n-set

INPUT PARAMETERS
    A           -   NxN linked list matrix
    J           -   column index to add
    S           -   target n-set

OUTPUT PARAMETERS
    S           -   elements from J-th column are added to S


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_mtxaddcolumnto(amdllmatrix* a,
     ae_int_t j,
     amdnset* s,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t eidx;


    n = a->n;
    eidx = a->vbegin.ptr.p_int[n+j];
    while(eidx>=0)
    {
        amdordering_nsaddelement(s, a->entries.ptr.p_int[eidx*amdordering_llmentrysize+4], _state);
        eidx = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
    }
}


/*************************************************************************
Inserts new element into column J, row I. The caller guarantees  that  the
element being inserted is NOT already present in the matrix.

INPUT PARAMETERS
    A           -   NxN linked list matrix
    I           -   row index
    J           -   column index

OUTPUT PARAMETERS
    A           -   element (I,J) added to the list.


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_mtxinsertnewelement(amdllmatrix* a,
     ae_int_t i,
     ae_int_t j,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    ae_int_t newsize;
    ae_int_t eidx;
    ae_int_t offs;


    n = a->n;
    if( a->vbegin.ptr.p_int[2*n]<0 )
    {
        newsize = 2*a->entriesinitialized+1;
        ivectorresize(&a->entries, newsize*amdordering_llmentrysize, _state);
        for(k=a->entriesinitialized; k<=newsize-2; k++)
        {
            a->entries.ptr.p_int[k*amdordering_llmentrysize+0] = k+1;
        }
        a->entries.ptr.p_int[(newsize-1)*amdordering_llmentrysize+0] = a->vbegin.ptr.p_int[2*n];
        a->vbegin.ptr.p_int[2*n] = a->entriesinitialized;
        a->entriesinitialized = newsize;
    }
    eidx = a->vbegin.ptr.p_int[2*n];
    offs = eidx*amdordering_llmentrysize;
    a->vbegin.ptr.p_int[2*n] = a->entries.ptr.p_int[offs+0];
    a->entries.ptr.p_int[offs+0] = -1;
    a->entries.ptr.p_int[offs+1] = a->vbegin.ptr.p_int[i];
    if( a->vbegin.ptr.p_int[i]>=0 )
    {
        a->entries.ptr.p_int[a->vbegin.ptr.p_int[i]*amdordering_llmentrysize+0] = eidx;
    }
    a->entries.ptr.p_int[offs+2] = -1;
    a->entries.ptr.p_int[offs+3] = a->vbegin.ptr.p_int[j+n];
    if( a->vbegin.ptr.p_int[j+n]>=0 )
    {
        a->entries.ptr.p_int[a->vbegin.ptr.p_int[j+n]*amdordering_llmentrysize+2] = eidx;
    }
    a->entries.ptr.p_int[offs+4] = i;
    a->entries.ptr.p_int[offs+5] = j;
    a->vbegin.ptr.p_int[i] = eidx;
    a->vbegin.ptr.p_int[j+n] = eidx;
    a->vcolcnt.ptr.p_int[j] = a->vcolcnt.ptr.p_int[j]+1;
}


/*************************************************************************
Counts elements in J-th column that are not present in n-set S

INPUT PARAMETERS
    A           -   NxN linked list matrix
    J           -   column index
    S           -   n-set to compare against

RESULT
    element count


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_mtxcountcolumnnot(amdllmatrix* a,
     ae_int_t j,
     amdnset* s,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t eidx;
    ae_int_t result;


    n = a->n;
    result = 0;
    eidx = a->vbegin.ptr.p_int[n+j];
    while(eidx>=0)
    {
        if( s->locationof.ptr.p_int[a->entries.ptr.p_int[eidx*amdordering_llmentrysize+4]]<0 )
        {
            result = result+1;
        }
        eidx = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
    }
    return result;
}


/*************************************************************************
Counts elements in J-th column

INPUT PARAMETERS
    A           -   NxN linked list matrix
    J           -   column index

RESULT
    element count


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_mtxcountcolumn(amdllmatrix* a,
     ae_int_t j,
     ae_state *_state)
{
    ae_int_t result;


    result = a->vcolcnt.ptr.p_int[j];
    return result;
}


/*************************************************************************
Clears K-th column or row

INPUT PARAMETERS
    A           -   NxN linked list matrix
    K           -   column/row index to clear
    IsCol       -   whether we want to clear row or column

OUTPUT PARAMETERS
    A           -   K-th column or row is empty


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_mtxclearx(amdllmatrix* a,
     ae_int_t k,
     ae_bool iscol,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t eidx;
    ae_int_t enext;
    ae_int_t idxprev;
    ae_int_t idxnext;
    ae_int_t idxr;
    ae_int_t idxc;


    n = a->n;
    if( iscol )
    {
        eidx = a->vbegin.ptr.p_int[n+k];
    }
    else
    {
        eidx = a->vbegin.ptr.p_int[k];
    }
    while(eidx>=0)
    {
        idxr = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+4];
        idxc = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+5];
        if( iscol )
        {
            enext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
        }
        else
        {
            enext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+1];
        }
        idxprev = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+0];
        idxnext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+1];
        if( idxprev>=0 )
        {
            a->entries.ptr.p_int[idxprev*amdordering_llmentrysize+1] = idxnext;
        }
        else
        {
            a->vbegin.ptr.p_int[idxr] = idxnext;
        }
        if( idxnext>=0 )
        {
            a->entries.ptr.p_int[idxnext*amdordering_llmentrysize+0] = idxprev;
        }
        idxprev = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+2];
        idxnext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
        if( idxprev>=0 )
        {
            a->entries.ptr.p_int[idxprev*amdordering_llmentrysize+3] = idxnext;
        }
        else
        {
            a->vbegin.ptr.p_int[idxc+n] = idxnext;
        }
        if( idxnext>=0 )
        {
            a->entries.ptr.p_int[idxnext*amdordering_llmentrysize+2] = idxprev;
        }
        a->entries.ptr.p_int[eidx*amdordering_llmentrysize+0] = a->vbegin.ptr.p_int[2*n];
        a->vbegin.ptr.p_int[2*n] = eidx;
        eidx = enext;
        if( !iscol )
        {
            a->vcolcnt.ptr.p_int[idxc] = a->vcolcnt.ptr.p_int[idxc]-1;
        }
    }
    if( iscol )
    {
        a->vcolcnt.ptr.p_int[k] = 0;
    }
}


/*************************************************************************
Clears J-th column

INPUT PARAMETERS
    A           -   NxN linked list matrix
    J           -   column index to clear

OUTPUT PARAMETERS
    A           -   J-th column is empty


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_mtxclearcolumn(amdllmatrix* a,
     ae_int_t j,
     ae_state *_state)
{


    amdordering_mtxclearx(a, j, ae_true, _state);
}


/*************************************************************************
Clears J-th row

INPUT PARAMETERS
    A           -   NxN linked list matrix
    J           -   row index to clear

OUTPUT PARAMETERS
    A           -   J-th row is empty


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_mtxclearrow(amdllmatrix* a,
     ae_int_t j,
     ae_state *_state)
{


    amdordering_mtxclearx(a, j, ae_false, _state);
}


/*************************************************************************
Initialize vertex storage using A to estimate initial degrees

INPUT PARAMETERS
    A           -   NxN lower triangular sparse CRS matrix
    N           -   problem size
    CheckExactDegrees-
                    whether we want to maintain additional exact degress
                    (the search is still done using approximate ones)

OUTPUT PARAMETERS
    S           -   vertex set


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_vtxinit(sparsematrix* a,
     ae_int_t n,
     ae_bool checkexactdegrees,
     amdvertexset* s,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jj;
    ae_int_t j0;
    ae_int_t j1;

    _amdvertexset_clear(s);

    s->n = n;
    s->checkexactdegrees = checkexactdegrees;
    s->smallestdegree = 0;
    bsetallocv(n, ae_true, &s->isvertex, _state);
    isetallocv(n, 0, &s->approxd, _state);
    for(i=0; i<=n-1; i++)
    {
        j0 = a->ridx.ptr.p_int[i];
        j1 = a->didx.ptr.p_int[i]-1;
        s->approxd.ptr.p_int[i] = j1-j0+1;
        for(jj=j0; jj<=j1; jj++)
        {
            j = a->idx.ptr.p_int[jj];
            s->approxd.ptr.p_int[j] = s->approxd.ptr.p_int[j]+1;
        }
    }
    if( checkexactdegrees )
    {
        icopyallocv(n, &s->approxd, &s->optionalexactd, _state);
    }
    isetallocv(n, -1, &s->vbegin, _state);
    isetallocv(n, -1, &s->vprev, _state);
    isetallocv(n, -1, &s->vnext, _state);
    for(i=0; i<=n-1; i++)
    {
        j = s->approxd.ptr.p_int[i];
        j0 = s->vbegin.ptr.p_int[j];
        s->vbegin.ptr.p_int[j] = i;
        s->vnext.ptr.p_int[i] = j0;
        s->vprev.ptr.p_int[i] = -1;
        if( j0>=0 )
        {
            s->vprev.ptr.p_int[j0] = i;
        }
    }
}


/*************************************************************************
Removes vertex from the storage

INPUT PARAMETERS
    S           -   vertex set
    P           -   vertex to be removed

OUTPUT PARAMETERS
    S           -   modified


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_vtxremovevertex(amdvertexset* s,
     ae_int_t p,
     ae_state *_state)
{
    ae_int_t d;
    ae_int_t idxprev;
    ae_int_t idxnext;


    d = s->approxd.ptr.p_int[p];
    idxprev = s->vprev.ptr.p_int[p];
    idxnext = s->vnext.ptr.p_int[p];
    if( idxprev>=0 )
    {
        s->vnext.ptr.p_int[idxprev] = idxnext;
    }
    else
    {
        s->vbegin.ptr.p_int[d] = idxnext;
    }
    if( idxnext>=0 )
    {
        s->vprev.ptr.p_int[idxnext] = idxprev;
    }
    s->isvertex.ptr.p_bool[p] = ae_false;
    s->approxd.ptr.p_int[p] = -9999999;
    if( s->checkexactdegrees )
    {
        s->optionalexactd.ptr.p_int[p] = -9999999;
    }
}


/*************************************************************************
Get approximate degree. Result is undefined for removed vertexes.

INPUT PARAMETERS
    S           -   vertex set
    P           -   vertex index

RESULT
    vertex degree


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_vtxgetapprox(amdvertexset* s,
     ae_int_t p,
     ae_state *_state)
{
    ae_int_t result;


    result = s->approxd.ptr.p_int[p];
    return result;
}


/*************************************************************************
Get exact degree (or 0, if not supported).  Result is undefined for
removed vertexes.

INPUT PARAMETERS
    S           -   vertex set
    P           -   vertex index

RESULT
    vertex degree


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_vtxgetexact(amdvertexset* s,
     ae_int_t p,
     ae_state *_state)
{
    ae_int_t result;


    if( s->checkexactdegrees )
    {
        result = s->optionalexactd.ptr.p_int[p];
    }
    else
    {
        result = 0;
    }
    return result;
}


/*************************************************************************
Returns index of vertex with minimum approximate degree, or -1 when there
is no vertex.

INPUT PARAMETERS
    S           -   vertex set

RESULT
    vertex index, or -1


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static ae_int_t amdordering_vtxgetapproxmindegree(amdvertexset* s,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t n;
    ae_int_t result;


    n = s->n;
    result = -1;
    for(i=s->smallestdegree; i<=n-1; i++)
    {
        if( s->vbegin.ptr.p_int[i]>=0 )
        {
            s->smallestdegree = i;
            result = s->vbegin.ptr.p_int[i];
            return result;
        }
    }
    return result;
}


/*************************************************************************
Update approximate degree

INPUT PARAMETERS
    S           -   vertex set
    P           -   vertex to be updated
    DNew        -   new degree

OUTPUT PARAMETERS
    S           -   modified


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_vtxupdateapproximatedegree(amdvertexset* s,
     ae_int_t p,
     ae_int_t dnew,
     ae_state *_state)
{
    ae_int_t dold;
    ae_int_t idxprev;
    ae_int_t idxnext;
    ae_int_t oldbegin;


    dold = s->approxd.ptr.p_int[p];
    if( dold==dnew )
    {
        return;
    }
    idxprev = s->vprev.ptr.p_int[p];
    idxnext = s->vnext.ptr.p_int[p];
    if( idxprev>=0 )
    {
        s->vnext.ptr.p_int[idxprev] = idxnext;
    }
    else
    {
        s->vbegin.ptr.p_int[dold] = idxnext;
    }
    if( idxnext>=0 )
    {
        s->vprev.ptr.p_int[idxnext] = idxprev;
    }
    oldbegin = s->vbegin.ptr.p_int[dnew];
    s->vbegin.ptr.p_int[dnew] = p;
    s->vnext.ptr.p_int[p] = oldbegin;
    s->vprev.ptr.p_int[p] = -1;
    if( oldbegin>=0 )
    {
        s->vprev.ptr.p_int[oldbegin] = p;
    }
    s->approxd.ptr.p_int[p] = dnew;
    if( dnew<s->smallestdegree )
    {
        s->smallestdegree = dnew;
    }
}


/*************************************************************************
Update optional exact degree. Silently returns if vertex set does not store
exact degrees.

INPUT PARAMETERS
    S           -   vertex set
    P           -   vertex to be updated
    D           -   new degree

OUTPUT PARAMETERS
    S           -   modified


  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_vtxupdateexactdegree(amdvertexset* s,
     ae_int_t p,
     ae_int_t d,
     ae_state *_state)
{


    if( !s->checkexactdegrees )
    {
        return;
    }
    s->optionalexactd.ptr.p_int[p] = d;
}


/*************************************************************************
This function selects K-th  pivot  with  minimum  approximate  degree  and
generates permutation that reorders variable to the K-th position  in  the
matrix.

Due to supernodal structure of the matrix more than one pivot variable can
be selected and moved to the beginning. The actual count of pivots selected
is returned in NodeSize.

INPUT PARAMETERS
    Buf         -   properly initialized buffer object
    K           -   pivot index

OUTPUT PARAMETERS
    Buf.Perm    -   entries [K,K+NodeSize) are initialized by permutation
    Buf.InvPerm -   entries [K,K+NodeSize) are initialized by permutation
    Buf.ColumnSwaps-entries [K,K+NodeSize) are initialized by permutation
    P           -   pivot supervariable
    NodeSize    -   supernode size

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_amdselectpivotelement(amdbuffer* buf,
     ae_int_t k,
     ae_int_t* p,
     ae_int_t* nodesize,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;

    *p = 0;
    *nodesize = 0;

    *p = amdordering_vtxgetapproxmindegree(&buf->vertexdegrees, _state);
    ae_assert(*p>=0, "GenerateAMDPermutation: integrity check 3634 failed", _state);
    ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, *p, _state)>=0, "integrity check RDFD2 failed", _state);
    *nodesize = 0;
    amdordering_knsstartenumeration(&buf->setsuper, *p, _state);
    while(amdordering_knsenumerate(&buf->setsuper, &j, _state))
    {
        i = buf->perm.ptr.p_int[j];
        buf->columnswaps.ptr.p_int[k+(*nodesize)] = i;
        buf->invperm.ptr.p_int[i] = buf->invperm.ptr.p_int[k+(*nodesize)];
        buf->invperm.ptr.p_int[k+(*nodesize)] = j;
        buf->perm.ptr.p_int[buf->invperm.ptr.p_int[i]] = i;
        buf->perm.ptr.p_int[buf->invperm.ptr.p_int[k+(*nodesize)]] = k+(*nodesize);
        inc(nodesize, _state);
    }
    ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, *p, _state)>=0&&(!buf->checkexactdegrees||amdordering_vtxgetexact(&buf->vertexdegrees, *p, _state)>=0), "AMD: integrity check RDFD failed", _state);
}


/*************************************************************************
This function computes nonzero pattern of Lp, the column that is added  to
the lower triangular Cholesky factor.

INPUT PARAMETERS
    Buf         -   properly initialized buffer object
    P           -   pivot column

OUTPUT PARAMETERS
    Buf.setP    -   initialized with setSuper[P]
    Buf.Lp      -   initialized with Lp\P
    Buf.setRp   -   initialized with Lp\{P+Q}
    Buf.Ep      -   initialized with setE[P]
    Buf.mtxL    -   L := L+Lp
    Buf.Ls      -   first Buf.LSCnt elements contain subset of Lp elements
                    that are principal nodes in supervariables.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_amdcomputelp(amdbuffer* buf,
     ae_int_t p,
     ae_state *_state)
{
    ae_int_t i;


    amdordering_nsclear(&buf->setp, _state);
    amdordering_nsaddkth(&buf->setp, &buf->setsuper, p, _state);
    amdordering_nsclear(&buf->lp, _state);
    amdordering_nsaddkth(&buf->lp, &buf->seta, p, _state);
    amdordering_knsstartenumeration(&buf->sete, p, _state);
    while(amdordering_knsenumerate(&buf->sete, &i, _state))
    {
        amdordering_mtxaddcolumnto(&buf->mtxl, i, &buf->lp, _state);
    }
    amdordering_nssubtractkth(&buf->lp, &buf->setsuper, p, _state);
    amdordering_nscopy(&buf->lp, &buf->setrp, _state);
    amdordering_nssubtract1(&buf->setrp, &buf->setq, _state);
    buf->lscnt = 0;
    amdordering_nsstartenumeration(&buf->lp, _state);
    while(amdordering_nsenumerate(&buf->lp, &i, _state))
    {
        ae_assert(!buf->iseliminated.ptr.p_bool[i], "AMD: integrity check 0740 failed", _state);
        amdordering_mtxinsertnewelement(&buf->mtxl, i, p, _state);
        if( buf->issupernode.ptr.p_bool[i] )
        {
            buf->ls.ptr.p_int[buf->lscnt] = i;
            buf->lscnt = buf->lscnt+1;
        }
    }
    amdordering_nsclear(&buf->ep, _state);
    amdordering_nsaddkth(&buf->ep, &buf->sete, p, _state);
}


/*************************************************************************
Having output of AMDComputeLp() in the Buf object, this function  performs
mass elimination in the quotient graph.

INPUT PARAMETERS
    Buf         -   properly initialized buffer object
    P           -   pivot column
    K           -   number of already eliminated columns (P-th is not counted)
    Tau         -   variables with degrees higher than Tau will be classified
                    as quasidense

OUTPUT PARAMETERS
    Buf.setA    -   Lp is eliminated from setA
    Buf.setE    -   Ep is eliminated from setE, P is added
    approxD     -   updated
    Buf.setQSuperCand-   contains candidates for quasidense status assignment

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_amdmasselimination(amdbuffer* buf,
     ae_int_t p,
     ae_int_t k,
     ae_int_t tau,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t lidx;
    ae_int_t lpi;
    ae_int_t cntsuperi;
    ae_int_t cntq;
    ae_int_t cntainoti;
    ae_int_t cntainotqi;
    ae_int_t cntlpnoti;
    ae_int_t cntlpnotqi;
    ae_int_t cc;
    ae_int_t j;
    ae_int_t e;
    ae_int_t we;
    ae_int_t cnttoclean;
    ae_int_t idxbegin;
    ae_int_t idxend;
    ae_int_t jj;
    ae_int_t bnd0;
    ae_int_t bnd1;
    ae_int_t bnd2;
    ae_int_t d;


    n = buf->n;
    ivectorsetlengthatleast(&buf->tmp0, n, _state);
    cnttoclean = 0;
    for(lidx=0; lidx<=buf->lscnt-1; lidx++)
    {
        if( buf->setq.locationof.ptr.p_int[buf->ls.ptr.p_int[lidx]]<0 )
        {
            lpi = buf->ls.ptr.p_int[lidx];
            cntsuperi = amdordering_knscountkth(&buf->setsuper, lpi, _state);
            amdordering_knsdirectaccess(&buf->sete, lpi, &idxbegin, &idxend, _state);
            for(jj=idxbegin; jj<=idxend-1; jj++)
            {
                e = buf->sete.data.ptr.p_int[jj];
                we = buf->arrwe.ptr.p_int[e];
                if( we<0 )
                {
                    we = amdordering_mtxcountcolumnnot(&buf->mtxl, e, &buf->setq, _state);
                    buf->tmp0.ptr.p_int[cnttoclean] = e;
                    cnttoclean = cnttoclean+1;
                }
                buf->arrwe.ptr.p_int[e] = we-cntsuperi;
            }
        }
    }
    amdordering_nsclear(&buf->setqsupercand, _state);
    for(lidx=0; lidx<=buf->lscnt-1; lidx++)
    {
        if( buf->setq.locationof.ptr.p_int[buf->ls.ptr.p_int[lidx]]<0 )
        {
            lpi = buf->ls.ptr.p_int[lidx];
            amdordering_knssubtract1(&buf->seta, lpi, &buf->lp, _state);
            amdordering_knssubtract1(&buf->seta, lpi, &buf->setp, _state);
            amdordering_knssubtract1(&buf->sete, lpi, &buf->ep, _state);
            amdordering_knsaddnewelement(&buf->sete, lpi, p, _state);
            if( buf->extendeddebug )
            {
                ae_assert(amdordering_knscountnotkth(&buf->seta, lpi, &buf->setsuper, lpi, _state)==amdordering_knscountkth(&buf->seta, lpi, _state), "AMD: integrity check 454F failed", _state);
                ae_assert(amdordering_knscountandkth(&buf->seta, lpi, &buf->setsuper, lpi, _state)==0, "AMD: integrity check kl5nv failed", _state);
                ae_assert(amdordering_nscountandkth(&buf->lp, &buf->setsuper, lpi, _state)==amdordering_knscountkth(&buf->setsuper, lpi, _state), "AMD: integrity check 8463 failed", _state);
            }
            cntq = amdordering_nscount(&buf->setq, _state);
            cntsuperi = amdordering_knscountkth(&buf->setsuper, lpi, _state);
            cntainoti = amdordering_knscountkth(&buf->seta, lpi, _state);
            if( cntq>0 )
            {
                cntainotqi = amdordering_knscountnot(&buf->seta, lpi, &buf->setq, _state);
            }
            else
            {
                cntainotqi = cntainoti;
            }
            cntlpnoti = amdordering_nscount(&buf->lp, _state)-cntsuperi;
            cntlpnotqi = amdordering_nscount(&buf->setrp, _state)-cntsuperi;
            cc = 0;
            amdordering_knsdirectaccess(&buf->sete, lpi, &idxbegin, &idxend, _state);
            for(jj=idxbegin; jj<=idxend-1; jj++)
            {
                j = buf->sete.data.ptr.p_int[jj];
                if( j==p )
                {
                    continue;
                }
                e = buf->arrwe.ptr.p_int[j];
                if( e<0 )
                {
                    if( cntq>0 )
                    {
                        e = amdordering_mtxcountcolumnnot(&buf->mtxl, j, &buf->setq, _state);
                    }
                    else
                    {
                        e = amdordering_mtxcountcolumn(&buf->mtxl, j, _state);
                    }
                }
                cc = cc+e;
            }
            bnd0 = n-k-amdordering_nscount(&buf->setp, _state);
            bnd1 = amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)+cntlpnoti;
            bnd2 = cntq+cntainotqi+cntlpnotqi+cc;
            d = imin3(bnd0, bnd1, bnd2, _state);
            amdordering_vtxupdateapproximatedegree(&buf->vertexdegrees, lpi, d, _state);
            if( tau>0&&d+cntsuperi>tau )
            {
                amdordering_nsaddelement(&buf->setqsupercand, lpi, _state);
            }
            if( buf->checkexactdegrees )
            {
                amdordering_nsclear(&buf->exactdegreetmp0, _state);
                amdordering_knsstartenumeration(&buf->sete, lpi, _state);
                while(amdordering_knsenumerate(&buf->sete, &j, _state))
                {
                    amdordering_mtxaddcolumnto(&buf->mtxl, j, &buf->exactdegreetmp0, _state);
                }
                amdordering_vtxupdateexactdegree(&buf->vertexdegrees, lpi, cntainoti+amdordering_nscountnotkth(&buf->exactdegreetmp0, &buf->setsuper, lpi, _state), _state);
                ae_assert((amdordering_knscountkth(&buf->sete, lpi, _state)>2||cntq>0)||amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)==amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state), "AMD: integrity check 7206 failed", _state);
                ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)>=amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state), "AMD: integrity check 8206 failed", _state);
            }
        }
    }
    for(j=0; j<=cnttoclean-1; j++)
    {
        buf->arrwe.ptr.p_int[buf->tmp0.ptr.p_int[j]] = -1;
    }
}


/*************************************************************************
After mass elimination, but before removal of vertex  P,  we  may  perform
supernode detection. Only variables/supernodes in  Lp  (P  itself  is  NOT
included) can be merged into larger supernodes.

INPUT PARAMETERS
    Buf         -   properly initialized buffer object

OUTPUT PARAMETERS
    Buf         -   following fields of Buf may be modified:
                    * Buf.setSuper
                    * Buf.setA
                    * Buf.setE
                    * Buf.IsSupernode
                    * ApproxD and ExactD

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void amdordering_amddetectsupernodes(amdbuffer* buf,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t cnt;
    ae_int_t lpi;
    ae_int_t lpj;
    ae_int_t nj;
    ae_int_t hashi;


    n = buf->n;
    ivectorsetlengthatleast(&buf->sncandidates, n, _state);
    if( buf->lscnt<2 )
    {
        return;
    }
    for(i=0; i<=buf->lscnt-1; i++)
    {
        if( buf->setq.locationof.ptr.p_int[buf->ls.ptr.p_int[i]]<0 )
        {
            lpi = buf->ls.ptr.p_int[i];
            hashi = (amdordering_knssumkth(&buf->seta, lpi, _state)+amdordering_knssumkth(&buf->sete, lpi, _state))%n;
            amdordering_nsaddelement(&buf->nonemptybuckets, hashi, _state);
            amdordering_knsaddnewelement(&buf->hashbuckets, hashi, lpi, _state);
        }
    }
    amdordering_nsstartenumeration(&buf->nonemptybuckets, _state);
    while(amdordering_nsenumerate(&buf->nonemptybuckets, &hashi, _state))
    {
        if( amdordering_knscountkth(&buf->hashbuckets, hashi, _state)>=2 )
        {
            cnt = 0;
            amdordering_knsstartenumeration(&buf->hashbuckets, hashi, _state);
            while(amdordering_knsenumerate(&buf->hashbuckets, &i, _state))
            {
                buf->sncandidates.ptr.p_int[cnt] = i;
                cnt = cnt+1;
            }
            for(i=cnt-1; i>=0; i--)
            {
                for(j=cnt-1; j>=i+1; j--)
                {
                    if( buf->issupernode.ptr.p_bool[buf->sncandidates.ptr.p_int[i]]&&buf->issupernode.ptr.p_bool[buf->sncandidates.ptr.p_int[j]] )
                    {
                        lpi = buf->sncandidates.ptr.p_int[i];
                        lpj = buf->sncandidates.ptr.p_int[j];
                        amdordering_nsclear(&buf->adji, _state);
                        amdordering_nsclear(&buf->adjj, _state);
                        amdordering_nsaddkth(&buf->adji, &buf->seta, lpi, _state);
                        amdordering_nsaddkth(&buf->adjj, &buf->seta, lpj, _state);
                        amdordering_nsaddkth(&buf->adji, &buf->sete, lpi, _state);
                        amdordering_nsaddkth(&buf->adjj, &buf->sete, lpj, _state);
                        amdordering_nsaddelement(&buf->adji, lpi, _state);
                        amdordering_nsaddelement(&buf->adji, lpj, _state);
                        amdordering_nsaddelement(&buf->adjj, lpi, _state);
                        amdordering_nsaddelement(&buf->adjj, lpj, _state);
                        if( !amdordering_nsequal(&buf->adji, &buf->adjj, _state) )
                        {
                            continue;
                        }
                        if( buf->extendeddebug )
                        {
                            ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)>=1&&(!buf->checkexactdegrees||amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state)>=1), "AMD: integrity check &GBFF1 failed", _state);
                            ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, lpj, _state)>=1&&(!buf->checkexactdegrees||amdordering_vtxgetexact(&buf->vertexdegrees, lpj, _state)>=1), "AMD: integrity check &GBFF2 failed", _state);
                            ae_assert(amdordering_knscountandkth(&buf->setsuper, lpi, &buf->setsuper, lpj, _state)==0, "AMD: integrity check &GBFF3 failed", _state);
                        }
                        nj = amdordering_knscountkth(&buf->setsuper, lpj, _state);
                        amdordering_knsaddkthdistinct(&buf->setsuper, lpi, &buf->setsuper, lpj, _state);
                        amdordering_knsclearkthreclaim(&buf->setsuper, lpj, _state);
                        amdordering_knsclearkthreclaim(&buf->seta, lpj, _state);
                        amdordering_knsclearkthreclaim(&buf->sete, lpj, _state);
                        buf->issupernode.ptr.p_bool[lpj] = ae_false;
                        amdordering_vtxremovevertex(&buf->vertexdegrees, lpj, _state);
                        amdordering_vtxupdateapproximatedegree(&buf->vertexdegrees, lpi, amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)-nj, _state);
                        if( buf->checkexactdegrees )
                        {
                            amdordering_vtxupdateexactdegree(&buf->vertexdegrees, lpi, amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state)-nj, _state);
                        }
                    }
                }
            }
        }
        amdordering_knsclearkthnoreclaim(&buf->hashbuckets, hashi, _state);
    }
    amdordering_nsclear(&buf->nonemptybuckets, _state);
}


void _amdnset_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    amdnset *p = (amdnset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->items, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->locationof, 0, DT_INT, _state, make_automatic);
}


void _amdnset_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    amdnset *dst = (amdnset*)_dst;
    amdnset *src = (amdnset*)_src;
    dst->n = src->n;
    dst->nstored = src->nstored;
    ae_vector_init_copy(&dst->items, &src->items, _state, make_automatic);
    ae_vector_init_copy(&dst->locationof, &src->locationof, _state, make_automatic);
    dst->iteridx = src->iteridx;
}


void _amdnset_clear(void* _p)
{
    amdnset *p = (amdnset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->items);
    ae_vector_clear(&p->locationof);
}


void _amdnset_destroy(void* _p)
{
    amdnset *p = (amdnset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->items);
    ae_vector_destroy(&p->locationof);
}


void _amdknset_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    amdknset *p = (amdknset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->flagarray, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->vbegin, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->vallocated, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->vcnt, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->data, 0, DT_INT, _state, make_automatic);
}


void _amdknset_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    amdknset *dst = (amdknset*)_dst;
    amdknset *src = (amdknset*)_src;
    dst->k = src->k;
    dst->n = src->n;
    ae_vector_init_copy(&dst->flagarray, &src->flagarray, _state, make_automatic);
    ae_vector_init_copy(&dst->vbegin, &src->vbegin, _state, make_automatic);
    ae_vector_init_copy(&dst->vallocated, &src->vallocated, _state, make_automatic);
    ae_vector_init_copy(&dst->vcnt, &src->vcnt, _state, make_automatic);
    ae_vector_init_copy(&dst->data, &src->data, _state, make_automatic);
    dst->dataused = src->dataused;
    dst->iterrow = src->iterrow;
    dst->iteridx = src->iteridx;
}


void _amdknset_clear(void* _p)
{
    amdknset *p = (amdknset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->flagarray);
    ae_vector_clear(&p->vbegin);
    ae_vector_clear(&p->vallocated);
    ae_vector_clear(&p->vcnt);
    ae_vector_clear(&p->data);
}


void _amdknset_destroy(void* _p)
{
    amdknset *p = (amdknset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->flagarray);
    ae_vector_destroy(&p->vbegin);
    ae_vector_destroy(&p->vallocated);
    ae_vector_destroy(&p->vcnt);
    ae_vector_destroy(&p->data);
}


void _amdvertexset_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    amdvertexset *p = (amdvertexset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->approxd, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->optionalexactd, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->isvertex, 0, DT_BOOL, _state, make_automatic);
    ae_vector_init(&p->vbegin, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->vprev, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->vnext, 0, DT_INT, _state, make_automatic);
}


void _amdvertexset_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    amdvertexset *dst = (amdvertexset*)_dst;
    amdvertexset *src = (amdvertexset*)_src;
    dst->n = src->n;
    dst->checkexactdegrees = src->checkexactdegrees;
    dst->smallestdegree = src->smallestdegree;
    ae_vector_init_copy(&dst->approxd, &src->approxd, _state, make_automatic);
    ae_vector_init_copy(&dst->optionalexactd, &src->optionalexactd, _state, make_automatic);
    ae_vector_init_copy(&dst->isvertex, &src->isvertex, _state, make_automatic);
    ae_vector_init_copy(&dst->vbegin, &src->vbegin, _state, make_automatic);
    ae_vector_init_copy(&dst->vprev, &src->vprev, _state, make_automatic);
    ae_vector_init_copy(&dst->vnext, &src->vnext, _state, make_automatic);
}


void _amdvertexset_clear(void* _p)
{
    amdvertexset *p = (amdvertexset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->approxd);
    ae_vector_clear(&p->optionalexactd);
    ae_vector_clear(&p->isvertex);
    ae_vector_clear(&p->vbegin);
    ae_vector_clear(&p->vprev);
    ae_vector_clear(&p->vnext);
}


void _amdvertexset_destroy(void* _p)
{
    amdvertexset *p = (amdvertexset*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->approxd);
    ae_vector_destroy(&p->optionalexactd);
    ae_vector_destroy(&p->isvertex);
    ae_vector_destroy(&p->vbegin);
    ae_vector_destroy(&p->vprev);
    ae_vector_destroy(&p->vnext);
}


void _amdllmatrix_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    amdllmatrix *p = (amdllmatrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->vbegin, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->vcolcnt, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->entries, 0, DT_INT, _state, make_automatic);
}


void _amdllmatrix_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    amdllmatrix *dst = (amdllmatrix*)_dst;
    amdllmatrix *src = (amdllmatrix*)_src;
    dst->n = src->n;
    ae_vector_init_copy(&dst->vbegin, &src->vbegin, _state, make_automatic);
    ae_vector_init_copy(&dst->vcolcnt, &src->vcolcnt, _state, make_automatic);
    ae_vector_init_copy(&dst->entries, &src->entries, _state, make_automatic);
    dst->entriesinitialized = src->entriesinitialized;
}


void _amdllmatrix_clear(void* _p)
{
    amdllmatrix *p = (amdllmatrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->vbegin);
    ae_vector_clear(&p->vcolcnt);
    ae_vector_clear(&p->entries);
}


void _amdllmatrix_destroy(void* _p)
{
    amdllmatrix *p = (amdllmatrix*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->vbegin);
    ae_vector_destroy(&p->vcolcnt);
    ae_vector_destroy(&p->entries);
}


void _amdbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    amdbuffer *p = (amdbuffer*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->iseliminated, 0, DT_BOOL, _state, make_automatic);
    ae_vector_init(&p->issupernode, 0, DT_BOOL, _state, make_automatic);
    _amdknset_init(&p->setsuper, _state, make_automatic);
    _amdknset_init(&p->seta, _state, make_automatic);
    _amdknset_init(&p->sete, _state, make_automatic);
    _amdllmatrix_init(&p->mtxl, _state, make_automatic);
    _amdvertexset_init(&p->vertexdegrees, _state, make_automatic);
    _amdnset_init(&p->setq, _state, make_automatic);
    ae_vector_init(&p->perm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->invperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->columnswaps, 0, DT_INT, _state, make_automatic);
    _amdnset_init(&p->setp, _state, make_automatic);
    _amdnset_init(&p->lp, _state, make_automatic);
    _amdnset_init(&p->setrp, _state, make_automatic);
    _amdnset_init(&p->ep, _state, make_automatic);
    _amdnset_init(&p->adji, _state, make_automatic);
    _amdnset_init(&p->adjj, _state, make_automatic);
    ae_vector_init(&p->ls, 0, DT_INT, _state, make_automatic);
    _amdnset_init(&p->setqsupercand, _state, make_automatic);
    _amdnset_init(&p->exactdegreetmp0, _state, make_automatic);
    _amdknset_init(&p->hashbuckets, _state, make_automatic);
    _amdnset_init(&p->nonemptybuckets, _state, make_automatic);
    ae_vector_init(&p->sncandidates, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->tmp0, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->arrwe, 0, DT_INT, _state, make_automatic);
    ae_matrix_init(&p->dbga, 0, 0, DT_REAL, _state, make_automatic);
}


void _amdbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    amdbuffer *dst = (amdbuffer*)_dst;
    amdbuffer *src = (amdbuffer*)_src;
    dst->n = src->n;
    dst->extendeddebug = src->extendeddebug;
    dst->checkexactdegrees = src->checkexactdegrees;
    ae_vector_init_copy(&dst->iseliminated, &src->iseliminated, _state, make_automatic);
    ae_vector_init_copy(&dst->issupernode, &src->issupernode, _state, make_automatic);
    _amdknset_init_copy(&dst->setsuper, &src->setsuper, _state, make_automatic);
    _amdknset_init_copy(&dst->seta, &src->seta, _state, make_automatic);
    _amdknset_init_copy(&dst->sete, &src->sete, _state, make_automatic);
    _amdllmatrix_init_copy(&dst->mtxl, &src->mtxl, _state, make_automatic);
    _amdvertexset_init_copy(&dst->vertexdegrees, &src->vertexdegrees, _state, make_automatic);
    _amdnset_init_copy(&dst->setq, &src->setq, _state, make_automatic);
    ae_vector_init_copy(&dst->perm, &src->perm, _state, make_automatic);
    ae_vector_init_copy(&dst->invperm, &src->invperm, _state, make_automatic);
    ae_vector_init_copy(&dst->columnswaps, &src->columnswaps, _state, make_automatic);
    _amdnset_init_copy(&dst->setp, &src->setp, _state, make_automatic);
    _amdnset_init_copy(&dst->lp, &src->lp, _state, make_automatic);
    _amdnset_init_copy(&dst->setrp, &src->setrp, _state, make_automatic);
    _amdnset_init_copy(&dst->ep, &src->ep, _state, make_automatic);
    _amdnset_init_copy(&dst->adji, &src->adji, _state, make_automatic);
    _amdnset_init_copy(&dst->adjj, &src->adjj, _state, make_automatic);
    ae_vector_init_copy(&dst->ls, &src->ls, _state, make_automatic);
    dst->lscnt = src->lscnt;
    _amdnset_init_copy(&dst->setqsupercand, &src->setqsupercand, _state, make_automatic);
    _amdnset_init_copy(&dst->exactdegreetmp0, &src->exactdegreetmp0, _state, make_automatic);
    _amdknset_init_copy(&dst->hashbuckets, &src->hashbuckets, _state, make_automatic);
    _amdnset_init_copy(&dst->nonemptybuckets, &src->nonemptybuckets, _state, make_automatic);
    ae_vector_init_copy(&dst->sncandidates, &src->sncandidates, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
    ae_vector_init_copy(&dst->arrwe, &src->arrwe, _state, make_automatic);
    ae_matrix_init_copy(&dst->dbga, &src->dbga, _state, make_automatic);
}


void _amdbuffer_clear(void* _p)
{
    amdbuffer *p = (amdbuffer*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->iseliminated);
    ae_vector_clear(&p->issupernode);
    _amdknset_clear(&p->setsuper);
    _amdknset_clear(&p->seta);
    _amdknset_clear(&p->sete);
    _amdllmatrix_clear(&p->mtxl);
    _amdvertexset_clear(&p->vertexdegrees);
    _amdnset_clear(&p->setq);
    ae_vector_clear(&p->perm);
    ae_vector_clear(&p->invperm);
    ae_vector_clear(&p->columnswaps);
    _amdnset_clear(&p->setp);
    _amdnset_clear(&p->lp);
    _amdnset_clear(&p->setrp);
    _amdnset_clear(&p->ep);
    _amdnset_clear(&p->adji);
    _amdnset_clear(&p->adjj);
    ae_vector_clear(&p->ls);
    _amdnset_clear(&p->setqsupercand);
    _amdnset_clear(&p->exactdegreetmp0);
    _amdknset_clear(&p->hashbuckets);
    _amdnset_clear(&p->nonemptybuckets);
    ae_vector_clear(&p->sncandidates);
    ae_vector_clear(&p->tmp0);
    ae_vector_clear(&p->arrwe);
    ae_matrix_clear(&p->dbga);
}


void _amdbuffer_destroy(void* _p)
{
    amdbuffer *p = (amdbuffer*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->iseliminated);
    ae_vector_destroy(&p->issupernode);
    _amdknset_destroy(&p->setsuper);
    _amdknset_destroy(&p->seta);
    _amdknset_destroy(&p->sete);
    _amdllmatrix_destroy(&p->mtxl);
    _amdvertexset_destroy(&p->vertexdegrees);
    _amdnset_destroy(&p->setq);
    ae_vector_destroy(&p->perm);
    ae_vector_destroy(&p->invperm);
    ae_vector_destroy(&p->columnswaps);
    _amdnset_destroy(&p->setp);
    _amdnset_destroy(&p->lp);
    _amdnset_destroy(&p->setrp);
    _amdnset_destroy(&p->ep);
    _amdnset_destroy(&p->adji);
    _amdnset_destroy(&p->adjj);
    ae_vector_destroy(&p->ls);
    _amdnset_destroy(&p->setqsupercand);
    _amdnset_destroy(&p->exactdegreetmp0);
    _amdknset_destroy(&p->hashbuckets);
    _amdnset_destroy(&p->nonemptybuckets);
    ae_vector_destroy(&p->sncandidates);
    ae_vector_destroy(&p->tmp0);
    ae_vector_destroy(&p->arrwe);
    ae_matrix_destroy(&p->dbga);
}


#endif
#if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Informational function, useful for debugging
*************************************************************************/
ae_int_t spsymmgetmaxfastkernel(ae_state *_state)
{
    ae_int_t result;


    result = spchol_maxfastkernel;
    return result;
}


/*************************************************************************
Symbolic phase of Cholesky decomposition.

Performs preliminary analysis of Cholesky/LDLT factorization.  The  latter
is computed with strictly diagonal D (no Bunch-Kauffman pivoting).

The analysis object produced by this function will be used later to  guide
actual decomposition.

Depending on settings specified during factorization, may produce  vanilla
Cholesky or L*D*LT  decomposition  (with  strictly  diagonal  D),  without
permutation or with permutation P (being either  topological  ordering  or
sparsity preserving ordering).

Thus, A is represented as either L*LT or L*D*LT or P*L*LT*PT or P*L*D*LT*PT.

NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
      matrices. However, numerical stability is guaranteed ONLY for a class
      of quasi-definite matrices.

INPUT PARAMETERS:
    A           -   sparse square matrix in CRS format, with LOWER triangle
                    being used to store the matrix.
    FactType    -   factorization type:
                    * 0 for traditional Cholesky
                    * 1 for LDLT decomposition with strictly diagonal D
    PermType    -   permutation type:
                    *-3 for debug improved AMD (a sequence of decreasing
                        tail sizes is generated, ~logN in total, even if
                        ordering can be done with just one round of AMD).
                        This ordering is used to test correctness of
                        multiple AMD rounds.
                    *-2 for column count ordering (NOT RECOMMENDED!)
                    *-1 for absence of permutation
                    * 0 for best permutation available
                    * 1 for supernodal ordering (improves locality and
                      performance, but does NOT change fill-in pattern)
                    * 2 for supernodal AMD ordering (improves fill-in)
                    * 3 for  improved  AMD  (approximate  minimum  degree)
                        ordering with better  handling  of  matrices  with
                        dense rows/columns
    Analysis    -   can be uninitialized instance, or previous analysis
                    results. Previously allocated memory is reused as much
                    as possible.
    Buf         -   buffer; may be completely uninitialized, or one remained
                    from previous calls (including ones with completely
                    different matrices). Previously allocated temporary
                    space will be reused as much as possible.

OUTPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure  which  will
                    be used later to guide  numerical  factorization.  The
                    numerical values are stored internally in the structure,
                    but you have to  run  factorization  phase  explicitly
                    with SPSymmAnalyze().  You  can  also  reload  another
                    matrix with same sparsity pattern with  SPSymmReload()
                    or rewrite its diagonal with SPSymmReloadDiagonal().

This function fails if and only if the matrix A is symbolically degenerate
i.e. has diagonal element which is exactly zero. In  such  case  False  is
returned.

NOTE: defining 'SCHOLESKY' trace tag will activate tracing.
      defining 'SCHOLESKY.SS' trace tag will activate detailed tracing  of
      the supernodal structure.

NOTE: defining 'DEBUG.SLOW' trace tag will  activate  extra-slow  (roughly
      N^3 ops) integrity checks, in addition to cheap O(1) ones.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
ae_bool spsymmanalyze(sparsematrix* a,
     ae_int_t facttype,
     ae_int_t permtype,
     spcholanalysis* analysis,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t jj;
    ae_int_t k;
    ae_int_t residual;
    ae_int_t tail;
    ae_bool permready;
    ae_bool result;


    ae_assert(sparseiscrs(a, _state), "SPSymmAnalyze: A is not stored in CRS format", _state);
    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SPSymmAnalyze: non-square A", _state);
    ae_assert(facttype==0||facttype==1, "SPSymmAnalyze: unexpected FactType", _state);
    ae_assert((((((permtype==0||permtype==1)||permtype==2)||permtype==3)||permtype==-1)||permtype==-2)||permtype==-3, "SPSymmAnalyze: unexpected PermType", _state);
    if( permtype==0 )
    {
        permtype = 3;
    }
    result = ae_true;
    n = sparsegetnrows(a, _state);
    analysis->tasktype = 0;
    analysis->n = n;
    analysis->unitd = facttype==0;
    analysis->permtype = permtype;
    analysis->extendeddebug = ae_is_trace_enabled("DEBUG.SLOW")&&n<=100;
    analysis->dotrace = ae_is_trace_enabled("SCHOLESKY");
    analysis->dotracesupernodalstructure = analysis->dotrace&&ae_is_trace_enabled("SCHOLESKY.SS");
    analysis->istopologicalordering = permtype==-1||permtype==1;
    analysis->applypermutationtooutput = permtype==-1;
    analysis->modtype = 0;
    analysis->modparam0 = 0.0;
    analysis->modparam1 = 0.0;
    analysis->modparam2 = 0.0;
    analysis->modparam3 = 0.0;

    /*
     * Allocate temporaries
     */
    ivectorsetlengthatleast(&analysis->tmpparent, n+1, _state);
    ivectorsetlengthatleast(&analysis->tmp0, n+1, _state);
    ivectorsetlengthatleast(&analysis->tmp1, n+1, _state);
    ivectorsetlengthatleast(&analysis->tmp2, n+1, _state);
    ivectorsetlengthatleast(&analysis->tmp3, n+1, _state);
    ivectorsetlengthatleast(&analysis->tmp4, n+1, _state);
    bvectorsetlengthatleast(&analysis->flagarray, n+1, _state);

    /*
     * Initial trace message
     */
    if( analysis->dotrace )
    {
        ae_trace("\n\n");
        ae_trace("////////////////////////////////////////////////////////////////////////////////////////////////////\n");
        ae_trace("//  SPARSE CHOLESKY ANALYSIS STARTED                                                              //\n");
        ae_trace("////////////////////////////////////////////////////////////////////////////////////////////////////\n");

        /*
         * Analyze row statistics
         */
        ae_trace("=== ANALYZING ROW STATISTICS =======================================================================\n");
        ae_trace("row size is:\n");
        isetv(n, 1, &analysis->tmp0, _state);
        for(i=0; i<=n-1; i++)
        {
            for(jj=a->ridx.ptr.p_int[i]; jj<=a->didx.ptr.p_int[i]-1; jj++)
            {
                j = a->idx.ptr.p_int[jj];
                analysis->tmp0.ptr.p_int[i] = analysis->tmp0.ptr.p_int[i]+1;
                analysis->tmp0.ptr.p_int[j] = analysis->tmp0.ptr.p_int[j]+1;
            }
        }
        k = 1;
        while(k<=n)
        {
            j = 0;
            for(i=0; i<=n-1; i++)
            {
                if( analysis->tmp0.ptr.p_int[i]>=k&&analysis->tmp0.ptr.p_int[i]<2*k )
                {
                    j = j+1;
                }
            }
            ae_trace("* [%6d..%6d) elements: %6d rows\n",
                (int)(k),
                (int)(2*k),
                (int)(j));
            k = k*2;
        }
    }

    /*
     * Initial integrity check - diagonal MUST be symbolically nonzero
     */
    for(i=0; i<=n-1; i++)
    {
        if( a->didx.ptr.p_int[i]==a->uidx.ptr.p_int[i] )
        {
            if( analysis->dotrace )
            {
                ae_trace("> the matrix diagonal is symbolically zero, stopping");
            }
            result = ae_false;
            return result;
        }
    }

    /*
     * What type of permutation do we have?
     */
    if( analysis->istopologicalordering )
    {
        ae_assert(permtype==-1||permtype==1, "SPSymmAnalyze: integrity check failed (ihebd)", _state);

        /*
         * Build topologically ordered elimination tree
         */
        spchol_buildorderedetree(a, n, &analysis->tmpparent, &analysis->superperm, &analysis->invsuperperm, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->flagarray, _state);
        ivectorsetlengthatleast(&analysis->fillinperm, n, _state);
        ivectorsetlengthatleast(&analysis->invfillinperm, n, _state);
        ivectorsetlengthatleast(&analysis->effectiveperm, n, _state);
        ivectorsetlengthatleast(&analysis->inveffectiveperm, n, _state);
        for(i=0; i<=n-1; i++)
        {
            analysis->fillinperm.ptr.p_int[i] = i;
            analysis->invfillinperm.ptr.p_int[i] = i;
            analysis->effectiveperm.ptr.p_int[i] = analysis->superperm.ptr.p_int[i];
            analysis->inveffectiveperm.ptr.p_int[i] = analysis->invsuperperm.ptr.p_int[i];
        }

        /*
         * Reorder input matrix
         */
        spchol_topologicalpermutation(a, &analysis->superperm, &analysis->tmpat, _state);

        /*
         * Analyze etree, build supernodal structure
         */
        spchol_createsupernodalstructure(&analysis->tmpat, &analysis->tmpparent, n, analysis, &analysis->node2supernode, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->tmp3, &analysis->tmp4, &analysis->flagarray, _state);

        /*
         * Having fully initialized supernodal structure, analyze dependencies
         */
        spchol_analyzesupernodaldependencies(analysis, a, &analysis->node2supernode, n, &analysis->tmp0, &analysis->tmp1, &analysis->flagarray, _state);

        /*
         * Load matrix into the supernodal storage
         */
        spchol_loadmatrix(analysis, &analysis->tmpat, _state);
    }
    else
    {

        /*
         * Generate fill-in reducing permutation
         */
        permready = ae_false;
        if( permtype==-2 )
        {
            spchol_generatedbgpermutation(a, n, &analysis->fillinperm, &analysis->invfillinperm, _state);
            permready = ae_true;
        }
        if( permtype==2 )
        {
            generateamdpermutation(a, n, &analysis->fillinperm, &analysis->invfillinperm, &analysis->amdtmp, _state);
            permready = ae_true;
        }
        if( permtype==3||permtype==-3 )
        {

            /*
             * Perform iterative AMD, with nearly-dense columns being postponed to be handled later.
             *
             * The current (residual) matrix A is divided into two parts: head, with its columns being
             * properly ordered, and tail, with its columns being reordered at the next iteration.
             *
             * After each partial AMD we compute sparsity pattern of the tail, set it as the new residual
             * and repeat iteration.
             */
            residual = n;
            iallocv(n, &analysis->fillinperm, _state);
            iallocv(n, &analysis->invfillinperm, _state);
            for(i=0; i<=n-1; i++)
            {
                analysis->fillinperm.ptr.p_int[i] = i;
                analysis->invfillinperm.ptr.p_int[i] = i;
            }
            sparsecopybuf(a, &analysis->tmpa, _state);
            if( analysis->dotrace )
            {
                ae_trace("> multiround AMD, tail=%0d\n",
                    (int)(residual));
            }
            while(residual>0)
            {

                /*
                 * Generate partial fill-in reducing permutation (leading Residual-Tail columns are
                 * properly ordered, the rest is unordered).
                 */
                tail = residual-generateamdpermutationx(&analysis->tmpa, residual, &analysis->tmpperm, &analysis->invtmpperm, 1, &analysis->amdtmp, _state);
                if( permtype==-3 )
                {

                    /*
                     * Special debug ordering in order to test correctness of multiple AMD rounds
                     */
                    tail = ae_maxint(tail, residual/2, _state);
                }
                ae_assert(tail<residual, "SPSymmAnalyze: integrity check failed (Tail=Residual)", _state);

                /*
                 * Apply permutation TmpPerm[] to the tail of the permutation FillInPerm[]
                 */
                for(i=0; i<=residual-1; i++)
                {
                    analysis->fillinperm.ptr.p_int[analysis->invfillinperm.ptr.p_int[n-residual+analysis->invtmpperm.ptr.p_int[i]]] = n-residual+i;
                }
                for(i=0; i<=n-1; i++)
                {
                    analysis->invfillinperm.ptr.p_int[analysis->fillinperm.ptr.p_int[i]] = i;
                }

                /*
                 * Compute partial Cholesky of the trailing submatrix (after applying rank-K update to the
                 * trailing submatrix but before Cholesky-factorizing it).
                 */
                if( tail>0 )
                {
                    sparsesymmpermtblbuf(&analysis->tmpa, ae_false, &analysis->tmpperm, &analysis->tmpa2, _state);
                    spchol_partialcholeskypattern(&analysis->tmpa2, residual-tail, tail, &analysis->tmpa, &analysis->tmpparent, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->flagarray, &analysis->tmpbottomt, &analysis->tmpupdatet, &analysis->tmpupdate, &analysis->tmpnewtailt, _state);
                    if( analysis->extendeddebug )
                    {
                        spchol_slowdebugchecks(a, &analysis->fillinperm, n, tail, &analysis->tmpa, _state);
                    }
                }
                residual = tail;
                if( analysis->dotrace )
                {
                    ae_trace("> multiround AMD, tail=%0d\n",
                        (int)(residual));
                }
            }
            permready = ae_true;
        }
        ae_assert(permready, "SPSymmAnalyze: integrity check failed (pp4td)", _state);

        /*
         * Apply permutation to the matrix, perform analysis on the initially reordered matrix
         * (we may need one more reordering, now topological one, due to supernodal analysis).
         * Build topologically ordered elimination tree
         */
        sparsesymmpermtblbuf(a, ae_false, &analysis->fillinperm, &analysis->tmpa, _state);
        spchol_buildorderedetree(&analysis->tmpa, n, &analysis->tmpparent, &analysis->superperm, &analysis->invsuperperm, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->flagarray, _state);
        ivectorsetlengthatleast(&analysis->effectiveperm, n, _state);
        ivectorsetlengthatleast(&analysis->inveffectiveperm, n, _state);
        for(i=0; i<=n-1; i++)
        {
            analysis->effectiveperm.ptr.p_int[i] = analysis->superperm.ptr.p_int[analysis->fillinperm.ptr.p_int[i]];
            analysis->inveffectiveperm.ptr.p_int[analysis->effectiveperm.ptr.p_int[i]] = i;
        }

        /*
         * Reorder input matrix
         */
        spchol_topologicalpermutation(&analysis->tmpa, &analysis->superperm, &analysis->tmpat, _state);

        /*
         * Analyze etree, build supernodal structure
         */
        spchol_createsupernodalstructure(&analysis->tmpat, &analysis->tmpparent, n, analysis, &analysis->node2supernode, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->tmp3, &analysis->tmp4, &analysis->flagarray, _state);

        /*
         * Having fully initialized supernodal structure, analyze dependencies
         */
        spchol_analyzesupernodaldependencies(analysis, &analysis->tmpa, &analysis->node2supernode, n, &analysis->tmp0, &analysis->tmp1, &analysis->flagarray, _state);

        /*
         * Load matrix into the supernodal storage
         */
        spchol_loadmatrix(analysis, &analysis->tmpat, _state);
    }
    return result;
}


/*************************************************************************
Sets modified Cholesky type

INPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure
    ModStrategy -   modification type:
                    * 0 for traditional Cholesky/LDLT (Cholesky fails when
                      encounters nonpositive pivot, LDLT fails  when  zero
                      pivot   is  encountered,  no  stability  checks  for
                      overflows/underflows)
                    * 1 for modified Cholesky with additional checks:
                      * pivots less than ModParam0 are increased; (similar
                        procedure with proper generalization is applied to
                        LDLT)
                      * if,  at  some  moment,  sum  of absolute values of
                        elements in column  J  will  become  greater  than
                        ModParam1, Cholesky/LDLT will treat it as  failure
                        and will stop immediately
                      * if ModParam0 is zero, no pivot modification is applied
                      * if ModParam1 is zero, no overflow check is performed
    P0, P1, P2,P3 - modification parameters #0 #1, #2 and #3.
                    Params #2 and #3 are ignored in current version.

OUTPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure, new strategy
                    (results will be seen with next SPSymmFactorize() call)

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
void spsymmsetmodificationstrategy(spcholanalysis* analysis,
     ae_int_t modstrategy,
     double p0,
     double p1,
     double p2,
     double p3,
     ae_state *_state)
{


    ae_assert(modstrategy==0||modstrategy==1, "SPSymmSetModificationStrategy: unexpected ModStrategy", _state);
    ae_assert(ae_isfinite(p0, _state)&&ae_fp_greater_eq(p0,(double)(0)), "SPSymmSetModificationStrategy: bad P0", _state);
    ae_assert(ae_isfinite(p1, _state), "SPSymmSetModificationStrategy: bad P1", _state);
    ae_assert(ae_isfinite(p2, _state), "SPSymmSetModificationStrategy: bad P2", _state);
    ae_assert(ae_isfinite(p3, _state), "SPSymmSetModificationStrategy: bad P3", _state);
    analysis->modtype = modstrategy;
    analysis->modparam0 = p0;
    analysis->modparam1 = p1;
    analysis->modparam2 = p2;
    analysis->modparam3 = p3;
}


/*************************************************************************
Updates symmetric  matrix  internally  stored  in  previously  initialized
Analysis object.

You can use this function to perform  multiple  factorizations  with  same
sparsity patterns: perform symbolic analysis  once  with  SPSymmAnalyze(),
then update internal matrix with SPSymmReload() and call SPSymmFactorize().

INPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure
    A           -   sparse square matrix in CRS format with LOWER triangle
                    being used to store the matrix. The matrix  MUST  have
                    sparsity   pattern   exactly   same  as  one  used  to
                    initialize the Analysis object.
                    The algorithm will fail in  an  unpredictable  way  if
                    something different was passed.

OUTPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure  which  will
                    be used later to guide  numerical  factorization.  The
                    numerical values are stored internally in the structure,
                    but you have to  run  factorization  phase  explicitly
                    with SPSymmAnalyze().  You  can  also  reload  another
                    matrix with same sparsity pattern with SPSymmReload().

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
void spsymmreload(spcholanalysis* analysis,
     sparsematrix* a,
     ae_state *_state)
{


    ae_assert(sparseiscrs(a, _state), "SPSymmReload: A is not stored in CRS format", _state);
    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SPSymmReload: non-square A", _state);
    if( analysis->istopologicalordering )
    {

        /*
         * Topological (fill-in preserving) ordering is used, we can copy
         * A directly into WrkAT using joint permute+transpose
         */
        spchol_topologicalpermutation(a, &analysis->effectiveperm, &analysis->tmpat, _state);
        spchol_loadmatrix(analysis, &analysis->tmpat, _state);
    }
    else
    {

        /*
         * Non-topological permutation; first we perform generic symmetric
         * permutation, then transpose result
         */
        sparsesymmpermtblbuf(a, ae_false, &analysis->effectiveperm, &analysis->tmpa, _state);
        sparsecopytransposecrsbuf(&analysis->tmpa, &analysis->tmpat, _state);
        spchol_loadmatrix(analysis, &analysis->tmpat, _state);
    }
}


/*************************************************************************
Updates  diagonal  of  the  symmetric  matrix  internally  stored  in  the
previously initialized Analysis object.

When only diagonal of the  matrix  has  changed,  this  function  is  more
efficient than SPSymmReload() that has to perform  costly  permutation  of
the entire matrix.

You can use this function to perform  multiple  factorizations  with  same
off-diagonal elements: perform symbolic analysis once with SPSymmAnalyze(),
then update diagonal with SPSymmReloadDiagonal() and call SPSymmFactorize().

INPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure
    D           -   array[N], diagonal factor

OUTPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure  which  will
                    be used later to guide  numerical  factorization.  The
                    numerical values are stored internally in the structure,
                    but you have to  run  factorization  phase  explicitly
                    with SPSymmAnalyze().  You  can  also  reload  another
                    matrix with same sparsity pattern with SPSymmReload().

  -- ALGLIB routine --
     05.09.2021
     Bochkanov Sergey
*************************************************************************/
void spsymmreloaddiagonal(spcholanalysis* analysis,
     /* Real    */ ae_vector* d,
     ae_state *_state)
{
    ae_int_t sidx;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t offss;
    ae_int_t sstride;
    ae_int_t j;


    ae_assert(d->cnt>=analysis->n, "SPSymmReloadDiagonal: length(D)<N", _state);
    for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
    {
        cols0 = analysis->supercolrange.ptr.p_int[sidx];
        cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
        offss = analysis->rowoffsets.ptr.p_int[sidx];
        sstride = analysis->rowstrides.ptr.p_int[sidx];
        for(j=cols0; j<=cols1-1; j++)
        {
            analysis->inputstorage.ptr.p_double[offss+(j-cols0)*sstride+(j-cols0)] = d->ptr.p_double[analysis->inveffectiveperm.ptr.p_int[j]];
        }
    }
}


/*************************************************************************
Sparse Cholesky factorization of symmetric matrix stored  in  CRS  format,
using precomputed analysis of the sparsity pattern stored  in the Analysis
object and specific numeric values that  are  presently  loaded  into  the
Analysis.

The factorization can be retrieved  with  SPSymmExtract().  Alternatively,
one can perform some operations without offloading  the  matrix  (somewhat
faster due to itilization of  SIMD-friendly  supernodal  data structures),
most importantly - linear system solution with SPSymmSolve().

Depending on settings specified during factorization, may produce  vanilla
Cholesky or L*D*LT  decomposition  (with  strictly  diagonal  D),  without
permutation or with permutation P (being either  topological  ordering  or
sparsity preserving ordering).

Thus, A is represented as either L*LT or L*D*LT or P*L*LT*PT or P*L*D*LT*PT.

NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
      matrices. However, numerical stability is guaranteed ONLY for a class
      of quasi-definite matrices.

INPUT PARAMETERS:
    Analysis    -   prior  analysis  performed on some sparse matrix, with
                    matrix being stored in Analysis.

OUTPUT PARAMETERS:
    Analysis    -   contains factorization results

The function returns True  when  factorization  resulted  in nondegenerate
matrix. False is returned when factorization fails (Cholesky factorization
of indefinite matrix) or LDLT factorization has exactly zero  elements  at
the diagonal.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
ae_bool spsymmfactorize(spcholanalysis* analysis, ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    ae_int_t ii;
    ae_int_t n;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t offss;
    ae_int_t blocksize;
    ae_int_t sidx;
    ae_int_t uidx;
    ae_bool result;


    ae_assert(analysis->tasktype==0, "SPCholFactorize: Analysis type does not match current task", _state);
    result = ae_true;
    n = analysis->n;

    /*
     * Prepare structures:
     * * WrkRows[] store pointers to beginnings of the offdiagonal supernode row ranges;
     *   at the beginning of the work WrkRows[]=0, but as we advance from the column
     *   range [0,A) to [A,B), to [B,C) and so on, we advance WrkRows[] in order to
     *   quickly skip parts that are less than A, less than B, less than C and so on.
     */
    ivectorsetlengthatleast(&analysis->raw2smap, n, _state);
    ivectorsetlengthatleast(&analysis->tmp0, n+1, _state);
    bsetallocv(n, ae_false, &analysis->flagarray, _state);
    isetallocv(analysis->nsuper, 0, &analysis->wrkrows, _state);
    rsetallocv(n, 0.0, &analysis->diagd, _state);
    rcopyallocv(analysis->rowoffsets.ptr.p_int[analysis->nsuper], &analysis->inputstorage, &analysis->outputstorage, _state);

    /*
     * Now we can run actual supernodal Cholesky
     */
    for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
    {
        cols0 = analysis->supercolrange.ptr.p_int[sidx];
        cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
        blocksize = cols1-cols0;
        offss = analysis->rowoffsets.ptr.p_int[sidx];

        /*
         * Prepare mapping of raw (range 0...N-1) indexes into internal (range 0...BlockSize+OffdiagSize-1) ones
         */
        if( analysis->extendeddebug )
        {
            isetv(n, -1, &analysis->raw2smap, _state);
        }
        for(i=cols0; i<=cols1-1; i++)
        {
            analysis->raw2smap.ptr.p_int[i] = i-cols0;
        }
        for(k=analysis->superrowridx.ptr.p_int[sidx]; k<=analysis->superrowridx.ptr.p_int[sidx+1]-1; k++)
        {
            analysis->raw2smap.ptr.p_int[analysis->superrowidx.ptr.p_int[k]] = blocksize+(k-analysis->superrowridx.ptr.p_int[sidx]);
        }

        /*
         * Update current supernode with nonzeros from the current row
         */
        for(ii=analysis->ladjplusr.ptr.p_int[sidx]; ii<=analysis->ladjplusr.ptr.p_int[sidx+1]-1; ii++)
        {
            uidx = analysis->ladjplus.ptr.p_int[ii];
            analysis->wrkrows.ptr.p_int[uidx] = spchol_updatesupernode(analysis, sidx, cols0, cols1, offss, &analysis->raw2smap, uidx, analysis->wrkrows.ptr.p_int[uidx], &analysis->diagd, analysis->supercolrange.ptr.p_int[uidx], _state);
        }

        /*
         * Factorize current supernode
         */
        if( !spchol_factorizesupernode(analysis, sidx, _state) )
        {
            result = ae_false;
            return result;
        }
    }
    return result;
}


/*************************************************************************
Extracts result of the last Cholesky/LDLT factorization performed  on  the
Analysis object.

Following calls will  result in the undefined behavior:
* calling for Analysis that was not factorized with SPSymmFactorize()
* calling after SPSymmFactorize() returned False

INPUT PARAMETERS:
    Analysis    -   prior factorization performed on some sparse matrix
    D, P        -   possibly preallocated buffers

OUTPUT PARAMETERS:
    A           -   Cholesky/LDLT decomposition  of A stored in CRS format
                    in LOWER triangle.
    D           -   array[N], diagonal factor. If no diagonal  factor  was
                    required during analysis  phase,  still  returned  but
                    filled with units.
    P           -   array[N], pivots. Permutation matrix P is a product of
                    P(0)*P(1)*...*P(N-1), where P(i) is a  permutation  of
                    row/col I and P[I] (with P[I]>=I).
                    If no permutation was requested during analysis phase,
                    still returned but filled with unit elements.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
void spsymmextract(spcholanalysis* analysis,
     sparsematrix* a,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* p,
     ae_state *_state)
{


    spchol_extractmatrix(analysis, &analysis->rowoffsets, &analysis->rowstrides, &analysis->outputstorage, &analysis->diagd, analysis->n, a, d, p, &analysis->tmp0, _state);
}


/*************************************************************************
Solve linear system A*x=b, using internally stored  factorization  of  the
matrix A.

Works faster than extracting the matrix and solving with SparseTRSV()  due
to SIMD-friendly supernodal data structures being used.

INPUT PARAMETERS:
    Analysis    -   prior factorization performed on some sparse matrix
    B           -   array[N], right-hand side

OUTPUT PARAMETERS:
    B           -   overwritten by X

  -- ALGLIB routine --
     08.09.2021
     Bochkanov Sergey
*************************************************************************/
void spsymmsolve(spcholanalysis* analysis,
     /* Real    */ ae_vector* b,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    double v;
    ae_int_t simdwidth;
    ae_int_t baseoffs;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t offss;
    ae_int_t sstride;
    ae_int_t sidx;
    ae_int_t blocksize;
    ae_int_t rbase;
    ae_int_t offdiagsize;


    n = analysis->n;
    simdwidth = spchol_spsymmgetmaxsimd(_state);
    rsetallocv(n, 0.0, &analysis->tmpx, _state);

    /*
     * Handle left-hand side permutation, convert data to internal SIMD-friendly format
     */
    rsetallocv(n*simdwidth, 0.0, &analysis->simdbuf, _state);
    for(i=0; i<=n-1; i++)
    {
        analysis->simdbuf.ptr.p_double[i*simdwidth] = b->ptr.p_double[analysis->inveffectiveperm.ptr.p_int[i]];
    }

    /*
     * Solve for L*tmp_x=rhs.
     *
     * The RHS (original and temporary updates) is stored in the SIMD-friendly SIMDBuf which
     * stores RHS as unevaluated sum of SIMDWidth numbers (this format allows easy updates
     * with SIMD intrinsics), the result is written into TmpX (traditional contiguous storage).
     */
    for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
    {
        cols0 = analysis->supercolrange.ptr.p_int[sidx];
        cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
        blocksize = cols1-cols0;
        offss = analysis->rowoffsets.ptr.p_int[sidx];
        sstride = analysis->rowstrides.ptr.p_int[sidx];
        rbase = analysis->superrowridx.ptr.p_int[sidx];
        offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-rbase;

        /*
         * Solve for variables in the supernode
         */
        for(i=cols0; i<=cols1-1; i++)
        {
            baseoffs = offss+(i-cols0)*sstride+(-cols0);
            v = (double)(0);
            for(j=0; j<=simdwidth-1; j++)
            {
                v = v+analysis->simdbuf.ptr.p_double[i*simdwidth+j];
            }
            for(j=cols0; j<=i-1; j++)
            {
                v = v-analysis->outputstorage.ptr.p_double[baseoffs+j]*analysis->tmpx.ptr.p_double[j];
            }
            analysis->tmpx.ptr.p_double[i] = v/analysis->outputstorage.ptr.p_double[baseoffs+i];
        }

        /*
         * Propagate update to other variables
         */
        spchol_propagatefwd(&analysis->tmpx, cols0, blocksize, &analysis->superrowidx, rbase, offdiagsize, &analysis->outputstorage, offss, sstride, &analysis->simdbuf, simdwidth, _state);
    }

    /*
     * Solve for D*tmp_x=rhs.
     */
    for(i=0; i<=n-1; i++)
    {
        if( analysis->diagd.ptr.p_double[i]!=0.0 )
        {
            analysis->tmpx.ptr.p_double[i] = analysis->tmpx.ptr.p_double[i]/analysis->diagd.ptr.p_double[i];
        }
        else
        {
            analysis->tmpx.ptr.p_double[i] = 0.0;
        }
    }

    /*
     * Solve for L'*tmp_x=rhs
     *
     */
    for(sidx=analysis->nsuper-1; sidx>=0; sidx--)
    {
        cols0 = analysis->supercolrange.ptr.p_int[sidx];
        cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
        blocksize = cols1-cols0;
        offss = analysis->rowoffsets.ptr.p_int[sidx];
        sstride = analysis->rowstrides.ptr.p_int[sidx];
        rbase = analysis->superrowridx.ptr.p_int[sidx];
        offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-rbase;

        /*
         * Subtract already computed variables
         */
        for(k=0; k<=offdiagsize-1; k++)
        {
            baseoffs = offss+(k+blocksize)*sstride;
            v = analysis->tmpx.ptr.p_double[analysis->superrowidx.ptr.p_int[rbase+k]];
            for(j=0; j<=blocksize-1; j++)
            {
                analysis->tmpx.ptr.p_double[cols0+j] = analysis->tmpx.ptr.p_double[cols0+j]-analysis->outputstorage.ptr.p_double[baseoffs+j]*v;
            }
        }

        /*
         * Solve for variables in the supernode
         */
        for(i=blocksize-1; i>=0; i--)
        {
            baseoffs = offss+i*sstride;
            v = analysis->tmpx.ptr.p_double[cols0+i]/analysis->outputstorage.ptr.p_double[baseoffs+i];
            for(j=0; j<=i-1; j++)
            {
                analysis->tmpx.ptr.p_double[cols0+j] = analysis->tmpx.ptr.p_double[cols0+j]-v*analysis->outputstorage.ptr.p_double[baseoffs+j];
            }
            analysis->tmpx.ptr.p_double[cols0+i] = v;
        }
    }

    /*
     * Handle right-hand side permutation, convert data to internal SIMD-friendly format
     */
    for(i=0; i<=n-1; i++)
    {
        b->ptr.p_double[i] = analysis->tmpx.ptr.p_double[analysis->effectiveperm.ptr.p_int[i]];
    }
}


/*************************************************************************
Compares diag(L*L') with that of the original A and returns  two  metrics:
* SumSq - sum of squares of diag(A)
* ErrSq - sum of squared errors, i.e. Frobenius norm of diag(L*L')-diag(A)

These metrics can be used to check accuracy of the factorization.

INPUT PARAMETERS:
    Analysis    -   prior factorization performed on some sparse matrix

OUTPUT PARAMETERS:
    SumSq, ErrSq-   diagonal magnitude and absolute diagonal error

  -- ALGLIB routine --
     08.09.2021
     Bochkanov Sergey
*************************************************************************/
void spsymmdiagerr(spcholanalysis* analysis,
     double* sumsq,
     double* errsq,
     ae_state *_state)
{
    ae_int_t n;
    double v;
    double vv;
    ae_int_t simdwidth;
    ae_int_t baseoffs;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t offss;
    ae_int_t sstride;
    ae_int_t sidx;
    ae_int_t blocksize;
    ae_int_t rbase;
    ae_int_t offdiagsize;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;

    *sumsq = 0;
    *errsq = 0;

    n = analysis->n;
    simdwidth = 1;

    /*
     * Scan L, compute diag(L*L')
     */
    rsetallocv(simdwidth*n, 0.0, &analysis->simdbuf, _state);
    for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
    {
        cols0 = analysis->supercolrange.ptr.p_int[sidx];
        cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
        blocksize = cols1-cols0;
        offss = analysis->rowoffsets.ptr.p_int[sidx];
        sstride = analysis->rowstrides.ptr.p_int[sidx];
        rbase = analysis->superrowridx.ptr.p_int[sidx];
        offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-rbase;

        /*
         * Handle triangular diagonal block
         */
        for(i=cols0; i<=cols1-1; i++)
        {
            baseoffs = offss+(i-cols0)*sstride+(-cols0);
            v = (double)(0);
            for(j=0; j<=simdwidth-1; j++)
            {
                v = v+analysis->simdbuf.ptr.p_double[i*simdwidth+j];
            }
            for(j=cols0; j<=i; j++)
            {
                vv = analysis->outputstorage.ptr.p_double[baseoffs+j];
                v = v+vv*vv*analysis->diagd.ptr.p_double[j];
            }
            *sumsq = *sumsq+ae_sqr(analysis->inputstorage.ptr.p_double[baseoffs+i], _state);
            *errsq = *errsq+ae_sqr(analysis->inputstorage.ptr.p_double[baseoffs+i]-v, _state);
        }

        /*
         * Accumulate entries below triangular diagonal block
         */
        for(k=0; k<=offdiagsize-1; k++)
        {
            i = analysis->superrowidx.ptr.p_int[rbase+k];
            baseoffs = offss+(k+blocksize)*sstride;
            v = analysis->simdbuf.ptr.p_double[i*simdwidth];
            for(j=0; j<=blocksize-1; j++)
            {
                vv = analysis->outputstorage.ptr.p_double[baseoffs+j];
                v = v+vv*vv*analysis->diagd.ptr.p_double[cols0+j];
            }
            analysis->simdbuf.ptr.p_double[i*simdwidth] = v;
        }
    }
}


#ifdef ALGLIB_NO_FAST_KERNELS
/*************************************************************************
Informational function, useful for debugging
*************************************************************************/
static ae_int_t spchol_spsymmgetmaxsimd(ae_state *_state)
{
    ae_int_t result;


    result = 1;
    return result;
}
#endif


#ifdef ALGLIB_NO_FAST_KERNELS
/*************************************************************************
Solving linear system: propagating computed supernode.

Propagates computed supernode to the rest of the RHS  using  SIMD-friendly
RHS storage format.

INPUT PARAMETERS:

OUTPUT PARAMETERS:

  -- ALGLIB routine --
     08.09.2021
     Bochkanov Sergey
*************************************************************************/
static void spchol_propagatefwd(/* Real    */ ae_vector* x,
     ae_int_t cols0,
     ae_int_t blocksize,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t rbase,
     ae_int_t offdiagsize,
     /* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t sstride,
     /* Real    */ ae_vector* simdbuf,
     ae_int_t simdwidth,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t baseoffs;
    double v;


    for(k=0; k<=offdiagsize-1; k++)
    {
        i = superrowidx->ptr.p_int[rbase+k];
        baseoffs = offss+(k+blocksize)*sstride;
        v = simdbuf->ptr.p_double[i*simdwidth];
        for(j=0; j<=blocksize-1; j++)
        {
            v = v-rowstorage->ptr.p_double[baseoffs+j]*x->ptr.p_double[cols0+j];
        }
        simdbuf->ptr.p_double[i*simdwidth] = v;
    }
}
#endif


/*************************************************************************
This function generates test reodering used for debug purposes only

INPUT PARAMETERS
    A           -   lower triangular sparse matrix in CRS format
    N           -   problem size

OUTPUT PARAMETERS
    Perm        -   array[N], maps original indexes I to permuted indexes
    InvPerm     -   array[N], maps permuted indexes I to original indexes

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_generatedbgpermutation(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* perm,
     /* Integer */ ae_vector* invperm,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t jj;
    ae_vector d;
    ae_vector tmpr;
    ae_vector tmpperm;

    ae_frame_make(_state, &_frame_block);
    memset(&d, 0, sizeof(d));
    memset(&tmpr, 0, sizeof(tmpr));
    memset(&tmpperm, 0, sizeof(tmpperm));
    ae_vector_init(&d, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tmpr, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tmpperm, 0, DT_INT, _state, ae_true);


    /*
     * Initialize D by vertex degrees
     */
    rsetallocv(n, (double)(0), &d, _state);
    for(i=0; i<=n-1; i++)
    {
        j0 = a->ridx.ptr.p_int[i];
        j1 = a->didx.ptr.p_int[i]-1;
        d.ptr.p_double[i] = (double)(j1-j0+1);
        for(jj=j0; jj<=j1; jj++)
        {
            j = a->idx.ptr.p_int[jj];
            d.ptr.p_double[j] = d.ptr.p_double[j]+1;
        }
    }

    /*
     * Prepare permutation that orders vertices by degrees
     */
    iallocv(n, invperm, _state);
    for(i=0; i<=n-1; i++)
    {
        invperm->ptr.p_int[i] = i;
    }
    tagsortfasti(&d, invperm, &tmpr, &tmpperm, n, _state);
    iallocv(n, perm, _state);
    for(i=0; i<=n-1; i++)
    {
        perm->ptr.p_int[invperm->ptr.p_int[i]] = i;
    }
    ae_frame_leave(_state);
}


/*************************************************************************
This function builds elimination tree in the original column order

INPUT PARAMETERS
    A           -   lower triangular sparse matrix in CRS format
    N           -   problem size
    Parent,
    tAbove      -   preallocated temporary array, length at least N+1, no
                    meaningful output is provided in these variables

OUTPUT PARAMETERS
    Parent      -   array[N], Parent[I] contains index of parent of I-th
                    column. -1 is used to denote column with no parents.

  -- ALGLIB PROJECT --
     Copyright 15.08.2021 by Bochkanov Sergey.
*************************************************************************/
static void spchol_buildunorderedetree(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* parent,
     /* Integer */ ae_vector* tabove,
     ae_state *_state)
{
    ae_int_t r;
    ae_int_t abover;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t jj;


    ae_assert(parent->cnt>=n+1, "BuildUnorderedETree: input buffer Parent is too short", _state);
    ae_assert(tabove->cnt>=n+1, "BuildUnorderedETree: input buffer tAbove is too short", _state);

    /*
     * Build elimination tree using Liu's algorithm with path compression
     */
    for(j=0; j<=n-1; j++)
    {
        parent->ptr.p_int[j] = n;
        tabove->ptr.p_int[j] = n;
        j0 = a->ridx.ptr.p_int[j];
        j1 = a->didx.ptr.p_int[j]-1;
        for(jj=j0; jj<=j1; jj++)
        {
            r = a->idx.ptr.p_int[jj];
            abover = tabove->ptr.p_int[r];
            while(abover<j)
            {
                k = abover;
                tabove->ptr.p_int[r] = j;
                r = k;
                abover = tabove->ptr.p_int[r];
            }
            if( abover==n )
            {
                tabove->ptr.p_int[r] = j;
                parent->ptr.p_int[r] = j;
            }
        }
    }

    /*
     * Convert to external format
     */
    for(i=0; i<=n-1; i++)
    {
        if( parent->ptr.p_int[i]==n )
        {
            parent->ptr.p_int[i] = -1;
        }
    }
}


/*************************************************************************
This function analyzes  elimination  tree  stored  using  'parent-of-node'
format and converts it to the 'childrens-of-node' format.

INPUT PARAMETERS
    Parent      -   array[N], supernodal etree
    N           -   problem size
    ChildrenR,
    ChildrenI,
    tTmp0       -   preallocated arrays, length at least N+1

OUTPUT PARAMETERS
    ChildrenR   -   array[N+1], children range (see below)
    ChildrenI   -   array[N+1], childrens of K-th node are stored  in  the
                    elements ChildrenI[ChildrenR[K]...ChildrenR[K+1]-1]

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_fromparenttochildren(/* Integer */ ae_vector* parent,
     ae_int_t n,
     /* Integer */ ae_vector* childrenr,
     /* Integer */ ae_vector* childreni,
     /* Integer */ ae_vector* ttmp0,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    ae_int_t nodeidx;


    ae_assert(ttmp0->cnt>=n+1, "FromParentToChildren: input buffer tTmp0 is too short", _state);
    ae_assert(childrenr->cnt>=n+1, "FromParentToChildren: input buffer ChildrenR is too short", _state);
    ae_assert(childreni->cnt>=n+1, "FromParentToChildren: input buffer ChildrenI is too short", _state);

    /*
     * Convert etree from per-column parent array to per-column children list
     */
    isetv(n, 0, ttmp0, _state);
    for(i=0; i<=n-1; i++)
    {
        nodeidx = parent->ptr.p_int[i];
        if( nodeidx>=0 )
        {
            ttmp0->ptr.p_int[nodeidx] = ttmp0->ptr.p_int[nodeidx]+1;
        }
    }
    childrenr->ptr.p_int[0] = 0;
    for(i=0; i<=n-1; i++)
    {
        childrenr->ptr.p_int[i+1] = childrenr->ptr.p_int[i]+ttmp0->ptr.p_int[i];
    }
    isetv(n, 0, ttmp0, _state);
    for(i=0; i<=n-1; i++)
    {
        k = parent->ptr.p_int[i];
        if( k>=0 )
        {
            childreni->ptr.p_int[childrenr->ptr.p_int[k]+ttmp0->ptr.p_int[k]] = i;
            ttmp0->ptr.p_int[k] = ttmp0->ptr.p_int[k]+1;
        }
    }
}


/*************************************************************************
This function builds elimination tree and reorders  it  according  to  the
topological post-ordering.

INPUT PARAMETERS
    A           -   lower triangular sparse matrix in CRS format
    N           -   problem size

    tRawParentOfRawNode,
    tRawParentOfReorderedNode,
    tTmp,
    tFlagArray  -   preallocated temporary arrays, length at least N+1, no
                    meaningful output is provided in these variables

OUTPUT PARAMETERS
    Parent      -   array[N], Parent[I] contains index of parent of I-th
                    column (after topological reordering). -1 is used to
                    denote column with no parents.
    SupernodalPermutation
                -   array[N], maps original indexes I to permuted indexes
    InvSupernodalPermutation
                -   array[N], maps permuted indexes I to original indexes

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_buildorderedetree(sparsematrix* a,
     ae_int_t n,
     /* Integer */ ae_vector* parent,
     /* Integer */ ae_vector* supernodalpermutation,
     /* Integer */ ae_vector* invsupernodalpermutation,
     /* Integer */ ae_vector* trawparentofrawnode,
     /* Integer */ ae_vector* trawparentofreorderednode,
     /* Integer */ ae_vector* ttmp,
     /* Boolean */ ae_vector* tflagarray,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    ae_int_t sidx;
    ae_int_t unprocessedchildrencnt;


    ae_assert(trawparentofrawnode->cnt>=n+1, "BuildOrderedETree: input buffer tRawParentOfRawNode is too short", _state);
    ae_assert(ttmp->cnt>=n+1, "BuildOrderedETree: input buffer tTmp is too short", _state);
    ae_assert(trawparentofreorderednode->cnt>=n+1, "BuildOrderedETree: input buffer tRawParentOfReorderedNode is too short", _state);
    ae_assert(tflagarray->cnt>=n+1, "BuildOrderedETree: input buffer tFlagArray is too short", _state);

    /*
     * Avoid spurious compiler warnings
     */
    unprocessedchildrencnt = 0;

    /*
     * Build elimination tree with original column order
     */
    spchol_buildunorderedetree(a, n, trawparentofrawnode, ttmp, _state);

    /*
     * Compute topological ordering of the elimination tree, produce:
     * * direct and inverse permutations
     * * reordered etree stored in Parent[]
     */
    isetallocv(n, -1, invsupernodalpermutation, _state);
    isetallocv(n, -1, supernodalpermutation, _state);
    isetallocv(n, -1, parent, _state);
    isetv(n, -1, trawparentofreorderednode, _state);
    isetv(n, 0, ttmp, _state);
    for(i=0; i<=n-1; i++)
    {
        k = trawparentofrawnode->ptr.p_int[i];
        if( k>=0 )
        {
            ttmp->ptr.p_int[k] = ttmp->ptr.p_int[k]+1;
        }
    }
    bsetv(n, ae_true, tflagarray, _state);
    sidx = 0;
    for(i=0; i<=n-1; i++)
    {
        if( tflagarray->ptr.p_bool[i] )
        {

            /*
             * Move column I to position SIdx, decrease unprocessed children count
             */
            supernodalpermutation->ptr.p_int[i] = sidx;
            invsupernodalpermutation->ptr.p_int[sidx] = i;
            tflagarray->ptr.p_bool[i] = ae_false;
            k = trawparentofrawnode->ptr.p_int[i];
            trawparentofreorderednode->ptr.p_int[sidx] = k;
            if( k>=0 )
            {
                unprocessedchildrencnt = ttmp->ptr.p_int[k]-1;
                ttmp->ptr.p_int[k] = unprocessedchildrencnt;
            }
            sidx = sidx+1;

            /*
             * Add parents (as long as parent has no unprocessed children)
             */
            while(k>=0&&unprocessedchildrencnt==0)
            {
                supernodalpermutation->ptr.p_int[k] = sidx;
                invsupernodalpermutation->ptr.p_int[sidx] = k;
                tflagarray->ptr.p_bool[k] = ae_false;
                k = trawparentofrawnode->ptr.p_int[k];
                trawparentofreorderednode->ptr.p_int[sidx] = k;
                if( k>=0 )
                {
                    unprocessedchildrencnt = ttmp->ptr.p_int[k]-1;
                    ttmp->ptr.p_int[k] = unprocessedchildrencnt;
                }
                sidx = sidx+1;
            }
        }
    }
    for(i=0; i<=n-1; i++)
    {
        k = trawparentofreorderednode->ptr.p_int[i];
        if( k>=0 )
        {
            parent->ptr.p_int[i] = supernodalpermutation->ptr.p_int[k];
        }
    }
}


/*************************************************************************
This function analyzes postordered elimination tree and creates supernodal
structure in Analysis object.

INPUT PARAMETERS
    AT          -   upper triangular CRS matrix, transpose and  reordering
                    of the original input matrix A
    Parent      -   array[N], supernodal etree
    N           -   problem size

    tChildrenR,
    tChildrenI,
    tParentNodeOfSupernode,
    tNode2Supernode,
    tTmp0,
    tFlagArray  -   temporary arrays, length at least N+1, simply provide
                    preallocated place.

OUTPUT PARAMETERS
    Analysis    -   following fields are initialized:
                    * Analysis.NSuper
                    * Analysis.SuperColRange
                    * Analysis.SuperRowRIdx
                    * Analysis.SuperRowIdx
                    * Analysis.ParentSupernode
                    * Analysis.OutRowCounts
                    other fields are ignored and not changed.
    Node2Supernode- array[N] that maps node indexes to supernode indexes

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_createsupernodalstructure(sparsematrix* at,
     /* Integer */ ae_vector* parent,
     ae_int_t n,
     spcholanalysis* analysis,
     /* Integer */ ae_vector* node2supernode,
     /* Integer */ ae_vector* tchildrenr,
     /* Integer */ ae_vector* tchildreni,
     /* Integer */ ae_vector* tparentnodeofsupernode,
     /* Integer */ ae_vector* tfakenonzeros,
     /* Integer */ ae_vector* ttmp0,
     /* Boolean */ ae_vector* tflagarray,
     ae_state *_state)
{
    ae_int_t nsuper;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t sidx;
    ae_int_t i0;
    ae_int_t ii;
    ae_int_t columnidx;
    ae_int_t nodeidx;
    ae_int_t rfirst;
    ae_int_t rlast;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t blocksize;
    ae_bool createsupernode;
    ae_int_t colcount;
    ae_int_t offdiagcnt;
    ae_int_t childcolcount;
    ae_int_t childoffdiagcnt;
    ae_int_t fakezerosinnewsupernode;
    double mergeinefficiency;
    ae_bool hastheonlychild;


    ae_assert(ttmp0->cnt>=n+1, "CreateSupernodalStructure: input buffer tTmp0 is too short", _state);
    ae_assert(tchildrenr->cnt>=n+1, "CreateSupernodalStructure: input buffer ChildrenR is too short", _state);
    ae_assert(tchildreni->cnt>=n+1, "CreateSupernodalStructure: input buffer ChildrenI is too short", _state);
    ae_assert(tparentnodeofsupernode->cnt>=n+1, "CreateSupernodalStructure: input buffer tParentNodeOfSupernode is too short", _state);
    ae_assert(tfakenonzeros->cnt>=n+1, "CreateSupernodalStructure: input buffer tFakeNonzeros is too short", _state);
    ae_assert(tflagarray->cnt>=n+1, "CreateSupernodalStructure: input buffer tFlagArray is too short", _state);

    /*
     * Trace
     */
    if( analysis->dotracesupernodalstructure )
    {
        ae_trace("=== GENERATING SUPERNODAL STRUCTURE ================================================================\n");
    }

    /*
     * Convert etree from per-column parent array to per-column children list
     */
    spchol_fromparenttochildren(parent, n, tchildrenr, tchildreni, ttmp0, _state);

    /*
     * Analyze supernodal structure:
     * * determine children count for each node
     * * combine chains of children into supernodes
     * * generate direct and inverse supernodal (topological) permutations
     * * generate column structure of supernodes (after supernodal permutation)
     */
    isetallocv(n, -1, node2supernode, _state);
    ivectorsetlengthatleast(&analysis->supercolrange, n+1, _state);
    ivectorsetlengthatleast(&analysis->superrowridx, n+1, _state);
    isetv(n, n+1, tparentnodeofsupernode, _state);
    bsetv(n, ae_true, tflagarray, _state);
    nsuper = 0;
    analysis->supercolrange.ptr.p_int[0] = 0;
    analysis->superrowridx.ptr.p_int[0] = 0;
    while(analysis->supercolrange.ptr.p_int[nsuper]<n)
    {
        columnidx = analysis->supercolrange.ptr.p_int[nsuper];

        /*
         * Compute nonzero pattern of the column, create temporary standalone node
         * for possible supernodal merge. Newly created node has just one column
         * and no fake nonzeros.
         */
        rfirst = analysis->superrowridx.ptr.p_int[nsuper];
        rlast = spchol_computenonzeropattern(at, columnidx, n, &analysis->superrowridx, &analysis->superrowidx, nsuper, tchildrenr, tchildreni, node2supernode, tflagarray, ttmp0, _state);
        analysis->supercolrange.ptr.p_int[nsuper+1] = columnidx+1;
        analysis->superrowridx.ptr.p_int[nsuper+1] = rlast;
        node2supernode->ptr.p_int[columnidx] = nsuper;
        tparentnodeofsupernode->ptr.p_int[nsuper] = parent->ptr.p_int[columnidx];
        tfakenonzeros->ptr.p_int[nsuper] = 0;
        offdiagcnt = rlast-rfirst;
        colcount = 1;
        nsuper = nsuper+1;
        if( analysis->dotracesupernodalstructure )
        {
            ae_trace("> incoming column %0d\n",
                (int)(columnidx));
            ae_trace("offdiagnnz = %0d\n",
                (int)(rlast-rfirst));
            ae_trace("children   = [ ");
            for(i=tchildrenr->ptr.p_int[columnidx]; i<=tchildrenr->ptr.p_int[columnidx+1]-1; i++)
            {
                ae_trace("S%0d ",
                    (int)(node2supernode->ptr.p_int[tchildreni->ptr.p_int[i]]));
            }
            ae_trace("]\n");
        }

        /*
         * Decide whether to merge column with previous supernode or not
         */
        childcolcount = 0;
        childoffdiagcnt = 0;
        mergeinefficiency = 0.0;
        fakezerosinnewsupernode = 0;
        createsupernode = ae_false;
        hastheonlychild = ae_false;
        if( nsuper>=2&&tparentnodeofsupernode->ptr.p_int[nsuper-2]==columnidx )
        {
            childcolcount = analysis->supercolrange.ptr.p_int[nsuper-1]-analysis->supercolrange.ptr.p_int[nsuper-2];
            childoffdiagcnt = analysis->superrowridx.ptr.p_int[nsuper-1]-analysis->superrowridx.ptr.p_int[nsuper-2];
            hastheonlychild = tchildrenr->ptr.p_int[columnidx+1]-tchildrenr->ptr.p_int[columnidx]==1;
            if( (hastheonlychild||spchol_relaxedsupernodes)&&colcount+childcolcount<=spchol_maxsupernode )
            {
                i = colcount+childcolcount;
                k = i*(i+1)/2+offdiagcnt*i;
                fakezerosinnewsupernode = tfakenonzeros->ptr.p_int[nsuper-2]+tfakenonzeros->ptr.p_int[nsuper-1]+(offdiagcnt-(childoffdiagcnt-1))*childcolcount;
                mergeinefficiency = (double)fakezerosinnewsupernode/(double)k;
                if( colcount+childcolcount==2&&fakezerosinnewsupernode<=spchol_smallfakestolerance )
                {
                    createsupernode = ae_true;
                }
                if( ae_fp_less_eq(mergeinefficiency,spchol_maxmergeinefficiency) )
                {
                    createsupernode = ae_true;
                }
            }
        }

        /*
         * Create supernode if needed
         */
        if( createsupernode )
        {

            /*
             * Create supernode from nodes NSuper-2 and NSuper-1.
             * Because these nodes are in the child-parent relation, we can simply
             * copy nonzero pattern from NSuper-1.
             */
            ae_assert(tparentnodeofsupernode->ptr.p_int[nsuper-2]==columnidx, "CreateSupernodalStructure: integrity check 9472 failed", _state);
            i0 = analysis->superrowridx.ptr.p_int[nsuper-1];
            ii = analysis->superrowridx.ptr.p_int[nsuper]-analysis->superrowridx.ptr.p_int[nsuper-1];
            rfirst = analysis->superrowridx.ptr.p_int[nsuper-2];
            rlast = rfirst+ii;
            for(i=0; i<=ii-1; i++)
            {
                analysis->superrowidx.ptr.p_int[rfirst+i] = analysis->superrowidx.ptr.p_int[i0+i];
            }
            analysis->supercolrange.ptr.p_int[nsuper-1] = columnidx+1;
            analysis->superrowridx.ptr.p_int[nsuper-1] = rlast;
            node2supernode->ptr.p_int[columnidx] = nsuper-2;
            tfakenonzeros->ptr.p_int[nsuper-2] = fakezerosinnewsupernode;
            tparentnodeofsupernode->ptr.p_int[nsuper-2] = parent->ptr.p_int[columnidx];
            nsuper = nsuper-1;

            /*
             * Trace
             */
            if( analysis->dotracesupernodalstructure )
            {
                ae_trace("> merged with supernode S%0d",
                    (int)(nsuper-1));
                if( ae_fp_neq(mergeinefficiency,(double)(0)) )
                {
                    ae_trace(" (%2.0f%% inefficiency)",
                        (double)(mergeinefficiency*100));
                }
                ae_trace("\n*\n");
            }
        }
        else
        {

            /*
             * Trace
             */
            if( analysis->dotracesupernodalstructure )
            {
                ae_trace("> standalone node S%0d created\n*\n",
                    (int)(nsuper-1));
            }
        }
    }
    analysis->nsuper = nsuper;
    ae_assert(analysis->nsuper>=1, "SPSymmAnalyze: integrity check failed (95mgd)", _state);
    ae_assert(analysis->supercolrange.ptr.p_int[0]==0, "SPCholFactorize: integrity check failed (f446s)", _state);
    ae_assert(analysis->supercolrange.ptr.p_int[nsuper]==n, "SPSymmAnalyze: integrity check failed (04ut4)", _state);
    isetallocv(nsuper, -1, &analysis->parentsupernode, _state);
    for(sidx=0; sidx<=nsuper-1; sidx++)
    {
        nodeidx = tparentnodeofsupernode->ptr.p_int[sidx];
        if( nodeidx>=0 )
        {
            nodeidx = node2supernode->ptr.p_int[nodeidx];
            analysis->parentsupernode.ptr.p_int[sidx] = nodeidx;
        }
    }

    /*
     * Allocate supernodal storage
     */
    ivectorsetlengthatleast(&analysis->rowoffsets, analysis->nsuper+1, _state);
    ivectorsetlengthatleast(&analysis->rowstrides, analysis->nsuper, _state);
    analysis->rowoffsets.ptr.p_int[0] = 0;
    for(i=0; i<=analysis->nsuper-1; i++)
    {
        blocksize = analysis->supercolrange.ptr.p_int[i+1]-analysis->supercolrange.ptr.p_int[i];
        analysis->rowstrides.ptr.p_int[i] = spchol_recommendedstridefor(blocksize, _state);
        analysis->rowoffsets.ptr.p_int[i+1] = analysis->rowoffsets.ptr.p_int[i];
        analysis->rowoffsets.ptr.p_int[i+1] = analysis->rowoffsets.ptr.p_int[i+1]+analysis->rowstrides.ptr.p_int[i]*blocksize;
        analysis->rowoffsets.ptr.p_int[i+1] = analysis->rowoffsets.ptr.p_int[i+1]+analysis->rowstrides.ptr.p_int[i]*(analysis->superrowridx.ptr.p_int[i+1]-analysis->superrowridx.ptr.p_int[i]);
        analysis->rowoffsets.ptr.p_int[i+1] = spchol_alignpositioninarray(analysis->rowoffsets.ptr.p_int[i+1], _state);
    }

    /*
     * Analyze output structure
     */
    isetallocv(n, 0, &analysis->outrowcounts, _state);
    for(sidx=0; sidx<=nsuper-1; sidx++)
    {
        cols0 = analysis->supercolrange.ptr.p_int[sidx];
        cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
        rfirst = analysis->superrowridx.ptr.p_int[sidx];
        rlast = analysis->superrowridx.ptr.p_int[sidx+1];
        blocksize = cols1-cols0;
        for(j=cols0; j<=cols1-1; j++)
        {
            analysis->outrowcounts.ptr.p_int[j] = analysis->outrowcounts.ptr.p_int[j]+(j-cols0+1);
        }
        for(ii=rfirst; ii<=rlast-1; ii++)
        {
            i0 = analysis->superrowidx.ptr.p_int[ii];
            analysis->outrowcounts.ptr.p_int[i0] = analysis->outrowcounts.ptr.p_int[i0]+blocksize;
        }
    }
}


/*************************************************************************
This function analyzes supernodal  structure  and  precomputes  dependency
matrix LAdj+

INPUT PARAMETERS
    Analysis    -   analysis object with completely initialized supernodal
                    structure
    RawA        -   original (before reordering) input matrix
    Node2Supernode- mapping from node to supernode indexes
    N           -   problem size

    tTmp0,
    tTmp1,
    tFlagArray  -   temporary arrays, length at least N+1, simply provide
                    preallocated place.

OUTPUT PARAMETERS
    Analysis    -   following fields are initialized:
                    * Analysis.LAdjPlus
                    * Analysis.LAdjPlusR
    Node2Supernode- array[N] that maps node indexes to supernode indexes

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_analyzesupernodaldependencies(spcholanalysis* analysis,
     sparsematrix* rawa,
     /* Integer */ ae_vector* node2supernode,
     ae_int_t n,
     /* Integer */ ae_vector* ttmp0,
     /* Integer */ ae_vector* ttmp1,
     /* Boolean */ ae_vector* tflagarray,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t rowidx;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t jj;
    ae_int_t rfirst;
    ae_int_t rlast;
    ae_int_t sidx;
    ae_int_t uidx;
    ae_int_t dbgrank1nodes;
    ae_int_t dbgrank2nodes;
    ae_int_t dbgrank3nodes;
    ae_int_t dbgrank4nodes;
    ae_int_t dbgbignodes;
    double dbgtotalflop;
    double dbgnoscatterflop;
    double dbgnorowscatterflop;
    double dbgnocolscatterflop;
    double dbgcholeskyflop;
    double dbgcholesky4flop;
    double dbgrank1flop;
    double dbgrank4plusflop;
    double dbg444flop;
    double dbgxx4flop;
    double uflop;
    ae_int_t wrkrow;
    ae_int_t offdiagrow;
    ae_int_t lastrow;
    ae_int_t uwidth;
    ae_int_t uheight;
    ae_int_t urank;
    ae_int_t theight;
    ae_int_t twidth;


    ae_assert(ttmp0->cnt>=n+1, "AnalyzeSupernodalDependencies: input buffer tTmp0 is too short", _state);
    ae_assert(ttmp1->cnt>=n+1, "AnalyzeSupernodalDependencies: input buffer tTmp1 is too short", _state);
    ae_assert(tflagarray->cnt>=n+1, "AnalyzeSupernodalDependencies: input buffer tTmp0 is too short", _state);
    ae_assert(sparseiscrs(rawa, _state), "AnalyzeSupernodalDependencies: RawA must be CRS matrix", _state);

    /*
     * Determine LAdjPlus - supernodes feeding updates to the SIdx-th one.
     *
     * Without supernodes we have: K-th row of L (also denoted as ladj+(K))
     * includes original nonzeros from A (also denoted as ladj(K)) as well
     * as all elements on paths in elimination tree from ladj(K) to K.
     *
     * With supernodes: same principle applied.
     */
    isetallocv(analysis->nsuper+1, 0, &analysis->ladjplusr, _state);
    bsetv(n, ae_true, tflagarray, _state);
    analysis->ladjplusr.ptr.p_int[0] = 0;
    for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
    {

        /*
         * Generate list of nodes feeding updates to SIdx-th one
         */
        ivectorgrowto(&analysis->ladjplus, analysis->ladjplusr.ptr.p_int[sidx]+analysis->nsuper, _state);
        rfirst = analysis->ladjplusr.ptr.p_int[sidx];
        rlast = rfirst;
        for(rowidx=analysis->supercolrange.ptr.p_int[sidx]; rowidx<=analysis->supercolrange.ptr.p_int[sidx+1]-1; rowidx++)
        {
            i = analysis->invsuperperm.ptr.p_int[rowidx];
            j0 = rawa->ridx.ptr.p_int[i];
            j1 = rawa->uidx.ptr.p_int[i]-1;
            for(jj=j0; jj<=j1; jj++)
            {
                j = node2supernode->ptr.p_int[analysis->superperm.ptr.p_int[rawa->idx.ptr.p_int[jj]]];
                if( j<sidx&&tflagarray->ptr.p_bool[j] )
                {
                    analysis->ladjplus.ptr.p_int[rlast] = j;
                    tflagarray->ptr.p_bool[j] = ae_false;
                    rlast = rlast+1;
                    j = analysis->parentsupernode.ptr.p_int[j];
                    while((j>=0&&j<sidx)&&tflagarray->ptr.p_bool[j])
                    {
                        analysis->ladjplus.ptr.p_int[rlast] = j;
                        tflagarray->ptr.p_bool[j] = ae_false;
                        rlast = rlast+1;
                        j = analysis->parentsupernode.ptr.p_int[j];
                    }
                }
            }
        }
        for(i=rfirst; i<=rlast-1; i++)
        {
            tflagarray->ptr.p_bool[analysis->ladjplus.ptr.p_int[i]] = ae_true;
        }
        analysis->ladjplusr.ptr.p_int[sidx+1] = rlast;
    }

    /*
     * Analyze statistics for trace output
     */
    if( analysis->dotrace )
    {
        ae_trace("=== ANALYZING SUPERNODAL DEPENDENCIES ==============================================================\n");
        dbgrank1nodes = 0;
        dbgrank2nodes = 0;
        dbgrank3nodes = 0;
        dbgrank4nodes = 0;
        dbgbignodes = 0;
        dbgtotalflop = (double)(0);
        dbgnoscatterflop = (double)(0);
        dbgnorowscatterflop = (double)(0);
        dbgnocolscatterflop = (double)(0);
        dbgrank1flop = (double)(0);
        dbgrank4plusflop = (double)(0);
        dbg444flop = (double)(0);
        dbgxx4flop = (double)(0);
        dbgcholeskyflop = (double)(0);
        dbgcholesky4flop = (double)(0);
        isetv(analysis->nsuper, 0, ttmp0, _state);
        for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
        {

            /*
             * Node sizes
             */
            if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==1 )
            {
                inc(&dbgrank1nodes, _state);
            }
            if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==2 )
            {
                inc(&dbgrank2nodes, _state);
            }
            if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==3 )
            {
                inc(&dbgrank3nodes, _state);
            }
            if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==4 )
            {
                inc(&dbgrank4nodes, _state);
            }
            if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]>4 )
            {
                inc(&dbgbignodes, _state);
            }

            /*
             * FLOP counts
             */
            twidth = analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx];
            theight = twidth+(analysis->superrowridx.ptr.p_int[sidx+1]-analysis->superrowridx.ptr.p_int[sidx]);
            for(i=analysis->ladjplusr.ptr.p_int[sidx]; i<=analysis->ladjplusr.ptr.p_int[sidx+1]-1; i++)
            {
                uidx = analysis->ladjplus.ptr.p_int[i];

                /*
                 * Determine update width, height, rank
                 */
                wrkrow = ttmp0->ptr.p_int[uidx];
                offdiagrow = wrkrow;
                lastrow = analysis->superrowridx.ptr.p_int[uidx+1]-analysis->superrowridx.ptr.p_int[uidx];
                while(offdiagrow<lastrow&&analysis->superrowidx.ptr.p_int[analysis->superrowridx.ptr.p_int[uidx]+offdiagrow]<analysis->supercolrange.ptr.p_int[sidx+1])
                {
                    offdiagrow = offdiagrow+1;
                }
                uwidth = offdiagrow-wrkrow;
                uheight = lastrow-wrkrow;
                urank = analysis->supercolrange.ptr.p_int[uidx+1]-analysis->supercolrange.ptr.p_int[uidx];
                ttmp0->ptr.p_int[uidx] = offdiagrow;

                /*
                 * Compute update FLOP cost
                 */
                uflop = rmul3((double)(uwidth), (double)(uheight), (double)(urank), _state);
                dbgtotalflop = dbgtotalflop+uflop;
                if( uheight==theight&&uwidth==twidth )
                {
                    dbgnoscatterflop = dbgnoscatterflop+uflop;
                }
                if( uheight==theight )
                {
                    dbgnorowscatterflop = dbgnorowscatterflop+uflop;
                }
                if( uwidth==twidth )
                {
                    dbgnocolscatterflop = dbgnocolscatterflop+uflop;
                }
                if( urank==1 )
                {
                    dbgrank1flop = dbgrank1flop+uflop;
                }
                if( urank>=4 )
                {
                    dbgrank4plusflop = dbgrank4plusflop+uflop;
                }
                if( (urank==4&&uwidth==4)&&twidth==4 )
                {
                    dbg444flop = dbg444flop+uflop;
                }
                if( twidth==4 )
                {
                    dbgxx4flop = dbgxx4flop+uflop;
                }
            }
            uflop = (double)(0);
            for(i=0; i<=twidth-1; i++)
            {
                uflop = uflop+(theight-i)*i+(theight-i);
            }
            dbgtotalflop = dbgtotalflop+uflop;
            dbgcholeskyflop = dbgcholeskyflop+uflop;
            if( twidth==4 )
            {
                dbgcholesky4flop = dbgcholesky4flop+uflop;
            }
        }

        /*
         * Output
         */
        ae_trace("> node size statistics:\n");
        ae_trace("rank1        = %6d\n",
            (int)(dbgrank1nodes));
        ae_trace("rank2        = %6d\n",
            (int)(dbgrank2nodes));
        ae_trace("rank3        = %6d\n",
            (int)(dbgrank3nodes));
        ae_trace("rank4        = %6d\n",
            (int)(dbgrank4nodes));
        ae_trace("big nodes    = %6d\n",
            (int)(dbgbignodes));
        ae_trace("> Total FLOP count (fused multiply-adds):\n");
        ae_trace("total        = %8.2f MFLOP\n",
            (double)(1.0E-6*dbgtotalflop));
        ae_trace("> FLOP counts for updates:\n");
        ae_trace("no-sctr      = %8.2f MFLOP    (no row scatter, no col scatter, best case)\n",
            (double)(1.0E-6*dbgnoscatterflop));
        ae_trace("M4*44->N4    = %8.2f MFLOP    (no col scatter, big blocks, good case)\n",
            (double)(1.0E-6*dbg444flop));
        ae_trace("no-row-sctr  = %8.2f MFLOP    (no row scatter, good case for col-wise storage)\n",
            (double)(1.0E-6*dbgnorowscatterflop));
        ae_trace("no-col-sctr  = %8.2f MFLOP    (no col scatter, good case for row-wise storage)\n",
            (double)(1.0E-6*dbgnocolscatterflop));
        ae_trace("XX*XX->N4    = %8.2f MFLOP\n",
            (double)(1.0E-6*dbgxx4flop));
        ae_trace("rank1        = %8.2f MFLOP\n",
            (double)(1.0E-6*dbgrank1flop));
        ae_trace("rank4+       = %8.2f MFLOP\n",
            (double)(1.0E-6*dbgrank4plusflop));
        ae_trace("> FLOP counts for Cholesky:\n");
        ae_trace("cholesky     = %8.2f MFLOP\n",
            (double)(1.0E-6*dbgcholeskyflop));
        ae_trace("cholesky4    = %8.2f MFLOP\n",
            (double)(1.0E-6*dbgcholesky4flop));
    }
}


/*************************************************************************
This function loads matrix into the supernodal storage.

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_loadmatrix(spcholanalysis* analysis,
     sparsematrix* at,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t ii;
    ae_int_t i0;
    ae_int_t i1;
    ae_int_t n;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t offss;
    ae_int_t sstride;
    ae_int_t blocksize;
    ae_int_t sidx;


    n = analysis->n;
    iallocv(n, &analysis->raw2smap, _state);
    rsetallocv(analysis->rowoffsets.ptr.p_int[analysis->nsuper], 0.0, &analysis->inputstorage, _state);
    for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
    {
        cols0 = analysis->supercolrange.ptr.p_int[sidx];
        cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
        blocksize = cols1-cols0;
        offss = analysis->rowoffsets.ptr.p_int[sidx];
        sstride = analysis->rowstrides.ptr.p_int[sidx];

        /*
         * Load supernode #SIdx using Raw2SMap to perform quick transformation between global and local indexing.
         */
        for(i=cols0; i<=cols1-1; i++)
        {
            analysis->raw2smap.ptr.p_int[i] = i-cols0;
        }
        for(k=analysis->superrowridx.ptr.p_int[sidx]; k<=analysis->superrowridx.ptr.p_int[sidx+1]-1; k++)
        {
            analysis->raw2smap.ptr.p_int[analysis->superrowidx.ptr.p_int[k]] = blocksize+(k-analysis->superrowridx.ptr.p_int[sidx]);
        }
        for(j=cols0; j<=cols1-1; j++)
        {
            i0 = at->ridx.ptr.p_int[j];
            i1 = at->ridx.ptr.p_int[j+1]-1;
            for(ii=i0; ii<=i1; ii++)
            {
                analysis->inputstorage.ptr.p_double[offss+analysis->raw2smap.ptr.p_int[at->idx.ptr.p_int[ii]]*sstride+(j-cols0)] = at->vals.ptr.p_double[ii];
            }
        }
    }
}


/*************************************************************************
This function extracts computed matrix from the supernodal storage.
Depending on settings, a supernodal permutation can be applied to the matrix.

INPUT PARAMETERS
    Analysis    -   analysis object with completely initialized supernodal
                    structure
    Offsets     -   offsets for supernodal storage
    Strides     -   row strides for supernodal storage
    RowStorage  -   supernodal storage
    DiagD       -   diagonal factor
    N           -   problem size

    TmpP        -   preallocated temporary array[N+1]

OUTPUT PARAMETERS
    A           -   sparse matrix in CRS format:
                    * for PermType=0, sparse matrix in the original ordering
                      (i.e. the matrix is reordered prior to output that
                      may require considerable amount of operations due to
                      permutation being applied)
                    * for PermType=1, sparse matrix in the topological
                      ordering. The least overhead for output.
    D           -   array[N], diagonal
    P           -   output permutation in product form

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_extractmatrix(spcholanalysis* analysis,
     /* Integer */ ae_vector* offsets,
     /* Integer */ ae_vector* strides,
     /* Real    */ ae_vector* rowstorage,
     /* Real    */ ae_vector* diagd,
     ae_int_t n,
     sparsematrix* a,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* p,
     /* Integer */ ae_vector* tmpp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t sidx;
    ae_int_t i0;
    ae_int_t ii;
    ae_int_t rfirst;
    ae_int_t rlast;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t blocksize;
    ae_int_t rowstride;
    ae_int_t offdiagsize;
    ae_int_t offssdiag;


    ae_assert(tmpp->cnt>=n+1, "ExtractMatrix: preallocated temporary TmpP is too short", _state);

    /*
     * Basic initialization
     */
    a->matrixtype = 1;
    a->n = n;
    a->m = n;

    /*
     * Various permutation types
     */
    if( analysis->applypermutationtooutput )
    {
        ae_assert(analysis->istopologicalordering, "ExtractMatrix: critical integrity check failed (attempt to merge in nontopological permutation)", _state);

        /*
         * Output matrix is topologically permuted, so we return A=L*L' instead of A=P*L*L'*P'.
         * Somewhat inefficient because we have to apply permutation to L returned by supernodal code.
         */
        ivectorsetlengthatleast(&a->ridx, n+1, _state);
        ivectorsetlengthatleast(&a->didx, n, _state);
        a->ridx.ptr.p_int[0] = 0;
        for(i=0; i<=n-1; i++)
        {
            a->ridx.ptr.p_int[i+1] = a->ridx.ptr.p_int[i]+analysis->outrowcounts.ptr.p_int[analysis->effectiveperm.ptr.p_int[i]];
        }
        for(i=0; i<=n-1; i++)
        {
            a->didx.ptr.p_int[i] = a->ridx.ptr.p_int[i];
        }
        a->ninitialized = a->ridx.ptr.p_int[n];
        rvectorsetlengthatleast(&a->vals, a->ninitialized, _state);
        ivectorsetlengthatleast(&a->idx, a->ninitialized, _state);
        for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
        {
            cols0 = analysis->supercolrange.ptr.p_int[sidx];
            cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
            rfirst = analysis->superrowridx.ptr.p_int[sidx];
            rlast = analysis->superrowridx.ptr.p_int[sidx+1];
            blocksize = cols1-cols0;
            offdiagsize = rlast-rfirst;
            rowstride = strides->ptr.p_int[sidx];
            offssdiag = offsets->ptr.p_int[sidx];
            for(i=0; i<=blocksize-1; i++)
            {
                i0 = analysis->inveffectiveperm.ptr.p_int[cols0+i];
                ii = a->didx.ptr.p_int[i0];
                for(j=0; j<=i; j++)
                {
                    a->idx.ptr.p_int[ii] = analysis->inveffectiveperm.ptr.p_int[cols0+j];
                    a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+i*rowstride+j];
                    ii = ii+1;
                }
                a->didx.ptr.p_int[i0] = ii;
            }
            for(k=0; k<=offdiagsize-1; k++)
            {
                i0 = analysis->inveffectiveperm.ptr.p_int[analysis->superrowidx.ptr.p_int[k+rfirst]];
                ii = a->didx.ptr.p_int[i0];
                for(j=0; j<=blocksize-1; j++)
                {
                    a->idx.ptr.p_int[ii] = analysis->inveffectiveperm.ptr.p_int[cols0+j];
                    a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+(blocksize+k)*rowstride+j];
                    ii = ii+1;
                }
                a->didx.ptr.p_int[i0] = ii;
            }
        }
        for(i=0; i<=n-1; i++)
        {
            ae_assert(a->didx.ptr.p_int[i]==a->ridx.ptr.p_int[i+1], "ExtractMatrix: integrity check failed (9473t)", _state);
            tagsortmiddleir(&a->idx, &a->vals, a->ridx.ptr.p_int[i], a->ridx.ptr.p_int[i+1]-a->ridx.ptr.p_int[i], _state);
            ae_assert(a->idx.ptr.p_int[a->ridx.ptr.p_int[i+1]-1]==i, "ExtractMatrix: integrity check failed (e4tfd)", _state);
        }
        sparseinitduidx(a, _state);

        /*
         * Prepare D[] and P[]
         */
        rvectorsetlengthatleast(d, n, _state);
        ivectorsetlengthatleast(p, n, _state);
        for(i=0; i<=n-1; i++)
        {
            d->ptr.p_double[i] = diagd->ptr.p_double[analysis->effectiveperm.ptr.p_int[i]];
            p->ptr.p_int[i] = i;
        }
    }
    else
    {

        /*
         * The permutation is NOT applied to L prior to extraction,
         * we return both L and P: A=P*L*L'*P'.
         */
        ivectorsetlengthatleast(&a->ridx, n+1, _state);
        ivectorsetlengthatleast(&a->didx, n, _state);
        a->ridx.ptr.p_int[0] = 0;
        for(i=0; i<=n-1; i++)
        {
            a->ridx.ptr.p_int[i+1] = a->ridx.ptr.p_int[i]+analysis->outrowcounts.ptr.p_int[i];
        }
        for(i=0; i<=n-1; i++)
        {
            a->didx.ptr.p_int[i] = a->ridx.ptr.p_int[i];
        }
        a->ninitialized = a->ridx.ptr.p_int[n];
        rvectorsetlengthatleast(&a->vals, a->ninitialized, _state);
        ivectorsetlengthatleast(&a->idx, a->ninitialized, _state);
        for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
        {
            cols0 = analysis->supercolrange.ptr.p_int[sidx];
            cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
            rfirst = analysis->superrowridx.ptr.p_int[sidx];
            rlast = analysis->superrowridx.ptr.p_int[sidx+1];
            blocksize = cols1-cols0;
            offdiagsize = rlast-rfirst;
            rowstride = strides->ptr.p_int[sidx];
            offssdiag = offsets->ptr.p_int[sidx];
            for(i=0; i<=blocksize-1; i++)
            {
                i0 = cols0+i;
                ii = a->didx.ptr.p_int[i0];
                for(j=0; j<=i; j++)
                {
                    a->idx.ptr.p_int[ii] = cols0+j;
                    a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+i*rowstride+j];
                    ii = ii+1;
                }
                a->didx.ptr.p_int[i0] = ii;
            }
            for(k=0; k<=offdiagsize-1; k++)
            {
                i0 = analysis->superrowidx.ptr.p_int[k+rfirst];
                ii = a->didx.ptr.p_int[i0];
                for(j=0; j<=blocksize-1; j++)
                {
                    a->idx.ptr.p_int[ii] = cols0+j;
                    a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+(blocksize+k)*rowstride+j];
                    ii = ii+1;
                }
                a->didx.ptr.p_int[i0] = ii;
            }
        }
        for(i=0; i<=n-1; i++)
        {
            ae_assert(a->didx.ptr.p_int[i]==a->ridx.ptr.p_int[i+1], "ExtractMatrix: integrity check failed (34e43)", _state);
            ae_assert(a->idx.ptr.p_int[a->ridx.ptr.p_int[i+1]-1]==i, "ExtractMatrix: integrity check failed (k4df5)", _state);
        }
        sparseinitduidx(a, _state);

        /*
         * Extract diagonal
         */
        rvectorsetlengthatleast(d, n, _state);
        for(i=0; i<=n-1; i++)
        {
            d->ptr.p_double[i] = diagd->ptr.p_double[i];
        }

        /*
         * Convert permutation table into product form
         */
        ivectorsetlengthatleast(p, n, _state);
        for(i=0; i<=n-1; i++)
        {
            p->ptr.p_int[i] = i;
            tmpp->ptr.p_int[i] = i;
        }
        for(i=0; i<=n-1; i++)
        {

            /*
             * We need to move element K to position I.
             * J is where K actually stored
             */
            k = analysis->inveffectiveperm.ptr.p_int[i];
            j = tmpp->ptr.p_int[k];

            /*
             * Swap elements of P[I:N-1] that is used to store current locations of elements in different way
             */
            i0 = p->ptr.p_int[i];
            p->ptr.p_int[i] = p->ptr.p_int[j];
            p->ptr.p_int[j] = i0;

            /*
             * record pivoting of positions I and J
             */
            p->ptr.p_int[i] = j;
            tmpp->ptr.p_int[i0] = j;
        }
    }
}


/*************************************************************************
Sparisity pattern of partial Cholesky.

This function splits lower triangular L into two parts: leading HEAD  cols
and trailing TAIL*TAIL submatrix. Then it computes sparsity pattern of the
Cholesky decomposition of the HEAD, extracts bottom TAIL*HEAD update matrix
U and applies it to the tail:

    pattern(TAIL) += pattern(U*U')

The pattern(TAIL) is returned. It is important that pattern(TAIL)  is  not
the sparsity pattern of trailing Cholesky factor, it is the pattern of the
temporary matrix that will be factorized.

The sparsity pattern of HEAD is NOT returned.

INPUT PARAMETERS:
    A       -   lower triangular  matrix  A whose partial sparsity pattern
                is  needed.  Only  sparsity  structure  matters,  specific
                element values are ignored.
    Head,Tail-  sizes of the leading/traling submatrices

    tmpParent,
    tmpChildrenR,
    cmpChildrenI
    tmp1,
    FlagArray
            -   preallocated temporary arrays, length at least Head+Tail
    tmpBottomT,
    tmpUpdateT,
    tmpUpdate-  temporary sparsematrix instances; previously allocated
                space will be reused.

OUTPUT PARAMETERS:
    ATail   -   sparsity pattern of the lower triangular temporary  matrix
                computed prior to Cholesky factorization. Matrix  elements
                are initialized by placeholder values.

  -- ALGLIB PROJECT --
     Copyright 21.08.2021 by Bochkanov Sergey.
*************************************************************************/
static void spchol_partialcholeskypattern(sparsematrix* a,
     ae_int_t head,
     ae_int_t tail,
     sparsematrix* atail,
     /* Integer */ ae_vector* tmpparent,
     /* Integer */ ae_vector* tmpchildrenr,
     /* Integer */ ae_vector* tmpchildreni,
     /* Integer */ ae_vector* tmp1,
     /* Boolean */ ae_vector* flagarray,
     sparsematrix* tmpbottomt,
     sparsematrix* tmpupdatet,
     sparsematrix* tmpupdate,
     sparsematrix* tmpnewtailt,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t i1;
    ae_int_t ii;
    ae_int_t j1;
    ae_int_t jj;
    ae_int_t kb;
    ae_int_t cursize;
    double v;


    ae_assert(a->m==head+tail, "PartialCholeskyPattern: rows(A)!=Head+Tail", _state);
    ae_assert(a->n==head+tail, "PartialCholeskyPattern: cols(A)!=Head+Tail", _state);
    ae_assert(tmpparent->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmpParent)<Head+Tail+1", _state);
    ae_assert(tmpchildrenr->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmpChildrenR)<Head+Tail+1", _state);
    ae_assert(tmpchildreni->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmpChildrenI)<Head+Tail+1", _state);
    ae_assert(tmp1->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmp1)<Head+Tail+1", _state);
    ae_assert(flagarray->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmp1)<Head+Tail+1", _state);
    cursize = head+tail;
    v = (double)1/(double)cursize;

    /*
     * Compute leading Head columns of the Cholesky decomposition of A.
     * These columns will be used later to update sparsity pattern of the trailing
     * Tail*Tail matrix.
     *
     * Actually, we need just bottom Tail rows of these columns whose transpose (a
     * Head*Tail matrix) is stored in the tmpBottomT matrix. In order to do so in
     * the most efficient way we analyze elimination tree of the reordered matrix.
     *
     * In addition to BOTTOM matrix B we also compute an UPDATE matrix U which does
     * not include rows with duplicating sparsity patterns (only parents in the
     * elimination tree are included). Using update matrix to compute the sparsity
     * pattern is much more efficient because we do not spend time on children columns.
     *
     * NOTE: because Cholesky decomposition deals with matrix columns, we transpose
     *       A, store it into ATail, and work with transposed matrix.
     */
    sparsecopytransposecrsbuf(a, atail, _state);
    spchol_buildunorderedetree(a, cursize, tmpparent, tmp1, _state);
    spchol_fromparenttochildren(tmpparent, cursize, tmpchildrenr, tmpchildreni, tmp1, _state);
    tmpbottomt->m = head;
    tmpbottomt->n = tail;
    iallocv(head+1, &tmpbottomt->ridx, _state);
    tmpbottomt->ridx.ptr.p_int[0] = 0;
    tmpupdatet->m = head;
    tmpupdatet->n = tail;
    iallocv(head+1, &tmpupdatet->ridx, _state);
    tmpupdatet->ridx.ptr.p_int[0] = 0;
    bsetv(tail, ae_false, flagarray, _state);
    for(j=0; j<=head-1; j++)
    {

        /*
         * Start J-th row of the tmpBottomT
         */
        kb = tmpbottomt->ridx.ptr.p_int[j];
        igrowv(kb+tail, &tmpbottomt->idx, _state);
        rgrowv(kb+tail, &tmpbottomt->vals, _state);

        /*
         * copy sparsity pattern J-th column of the reordered matrix
         */
        jj = atail->didx.ptr.p_int[j];
        j1 = atail->ridx.ptr.p_int[j+1]-1;
        while(jj<=j1&&atail->idx.ptr.p_int[jj]<head)
        {
            jj = jj+1;
        }
        while(jj<=j1)
        {
            i = atail->idx.ptr.p_int[jj]-head;
            tmpbottomt->idx.ptr.p_int[kb] = i;
            tmpbottomt->vals.ptr.p_double[kb] = v;
            flagarray->ptr.p_bool[i] = ae_true;
            kb = kb+1;
            jj = jj+1;
        }

        /*
         * Fetch sparsity pattern from the immediate children in the elimination tree
         */
        for(jj=tmpchildrenr->ptr.p_int[j]; jj<=tmpchildrenr->ptr.p_int[j+1]-1; jj++)
        {
            j1 = tmpchildreni->ptr.p_int[jj];
            ii = tmpbottomt->ridx.ptr.p_int[j1];
            i1 = tmpbottomt->ridx.ptr.p_int[j1+1]-1;
            while(ii<=i1)
            {
                i = tmpbottomt->idx.ptr.p_int[ii];
                if( !flagarray->ptr.p_bool[i] )
                {
                    tmpbottomt->idx.ptr.p_int[kb] = i;
                    tmpbottomt->vals.ptr.p_double[kb] = v;
                    flagarray->ptr.p_bool[i] = ae_true;
                    kb = kb+1;
                }
                ii = ii+1;
            }
        }

        /*
         * Finalize row of tmpBottomT
         */
        for(ii=tmpbottomt->ridx.ptr.p_int[j]; ii<=kb-1; ii++)
        {
            flagarray->ptr.p_bool[tmpbottomt->idx.ptr.p_int[ii]] = ae_false;
        }
        tmpbottomt->ridx.ptr.p_int[j+1] = kb;

        /*
         * Only columns that forward their sparsity pattern directly into the tail are added to tmpUpdateT
         */
        if( tmpparent->ptr.p_int[j]>=head )
        {

            /*
             * J-th column of the head forwards its sparsity pattern directly into the tail, save it to tmpUpdateT
             */
            k = tmpupdatet->ridx.ptr.p_int[j];
            igrowv(k+tail, &tmpupdatet->idx, _state);
            rgrowv(k+tail, &tmpupdatet->vals, _state);
            jj = tmpbottomt->ridx.ptr.p_int[j];
            j1 = tmpbottomt->ridx.ptr.p_int[j+1]-1;
            while(jj<=j1)
            {
                i = tmpbottomt->idx.ptr.p_int[jj];
                tmpupdatet->idx.ptr.p_int[k] = i;
                tmpupdatet->vals.ptr.p_double[k] = v;
                k = k+1;
                jj = jj+1;
            }
            tmpupdatet->ridx.ptr.p_int[j+1] = k;
        }
        else
        {

            /*
             * J-th column of the head forwards its sparsity pattern to another column in the head,
             * no need to save it to tmpUpdateT. Save empty row.
             */
            k = tmpupdatet->ridx.ptr.p_int[j];
            tmpupdatet->ridx.ptr.p_int[j+1] = k;
        }
    }
    sparsecreatecrsinplace(tmpupdatet, _state);
    sparsecopytransposecrsbuf(tmpupdatet, tmpupdate, _state);

    /*
     * Apply update U*U' to the trailing Tail*Tail matrix and generate new
     * residual matrix in tmpNewTailT. Then transpose/copy it to TmpA[].
     */
    bsetv(tail, ae_false, flagarray, _state);
    tmpnewtailt->m = tail;
    tmpnewtailt->n = tail;
    iallocv(tail+1, &tmpnewtailt->ridx, _state);
    tmpnewtailt->ridx.ptr.p_int[0] = 0;
    for(j=0; j<=tail-1; j++)
    {
        k = tmpnewtailt->ridx.ptr.p_int[j];
        igrowv(k+tail, &tmpnewtailt->idx, _state);
        rgrowv(k+tail, &tmpnewtailt->vals, _state);

        /*
         * Copy row from the reordered/transposed matrix stored in TmpA
         */
        tmpnewtailt->idx.ptr.p_int[k] = j;
        tmpnewtailt->vals.ptr.p_double[k] = (double)(1);
        flagarray->ptr.p_bool[j] = ae_true;
        k = k+1;
        jj = atail->didx.ptr.p_int[head+j]+1;
        j1 = atail->ridx.ptr.p_int[head+j+1]-1;
        while(jj<=j1)
        {
            i = atail->idx.ptr.p_int[jj]-head;
            tmpnewtailt->idx.ptr.p_int[k] = i;
            tmpnewtailt->vals.ptr.p_double[k] = v;
            flagarray->ptr.p_bool[i] = ae_true;
            k = k+1;
            jj = jj+1;
        }

        /*
         * Apply update U*U' to J-th column of new tail (J-th row of tmpNewTailT):
         * * scan J-th row of U
         * * for each nonzero element, append corresponding row of U' (elements from J+1-th) to tmpNewTailT
         * * FlagArray[] is used to avoid duplication of nonzero elements
         */
        jj = tmpupdate->ridx.ptr.p_int[j];
        j1 = tmpupdate->ridx.ptr.p_int[j+1]-1;
        while(jj<=j1)
        {

            /*
             * Get row of U', skip leading elements up to J-th
             */
            ii = tmpupdatet->ridx.ptr.p_int[tmpupdate->idx.ptr.p_int[jj]];
            i1 = tmpupdatet->ridx.ptr.p_int[tmpupdate->idx.ptr.p_int[jj]+1]-1;
            while(ii<=i1&&tmpupdatet->idx.ptr.p_int[ii]<=j)
            {
                ii = ii+1;
            }

            /*
             * Append the rest of the row to tmpNewTailT
             */
            while(ii<=i1)
            {
                i = tmpupdatet->idx.ptr.p_int[ii];
                if( !flagarray->ptr.p_bool[i] )
                {
                    tmpnewtailt->idx.ptr.p_int[k] = i;
                    tmpnewtailt->vals.ptr.p_double[k] = v;
                    flagarray->ptr.p_bool[i] = ae_true;
                    k = k+1;
                }
                ii = ii+1;
            }

            /*
             * Continue or stop early (if we completely filled output buffer)
             */
            if( k-tmpnewtailt->ridx.ptr.p_int[j]==tail-j )
            {
                break;
            }
            jj = jj+1;
        }

        /*
         * Finalize:
         * * clean up FlagArray[]
         * * save K to RIdx[]
         */
        for(ii=tmpnewtailt->ridx.ptr.p_int[j]; ii<=k-1; ii++)
        {
            flagarray->ptr.p_bool[tmpnewtailt->idx.ptr.p_int[ii]] = ae_false;
        }
        tmpnewtailt->ridx.ptr.p_int[j+1] = k;
    }
    sparsecreatecrsinplace(tmpnewtailt, _state);
    sparsecopytransposecrsbuf(tmpnewtailt, atail, _state);
}


/*************************************************************************
This function is a specialized version of SparseSymmPermTbl()  that  takes
into   account specifics of topological reorderings (improves performance)
and additionally transposes its output.

INPUT PARAMETERS
    A           -   sparse lower triangular matrix in CRS format.
    P           -   array[N] which stores permutation table;  P[I]=J means
                    that I-th row/column of matrix  A  is  moved  to  J-th
                    position. For performance reasons we do NOT check that
                    P[] is  a   correct   permutation  (that there  is  no
                    repetitions, just that all its elements  are  in [0,N)
                    range.
    B           -   sparse matrix object that will hold output.
                    Previously allocated memory will be reused as much  as
                    possible.

OUTPUT PARAMETERS
    B           -   permuted and transposed upper triangular matrix in the
                    special internal CRS-like matrix format (MatrixType=-10082).

  -- ALGLIB PROJECT --
     Copyright 05.10.2020 by Bochkanov Sergey.
*************************************************************************/
static void spchol_topologicalpermutation(sparsematrix* a,
     /* Integer */ ae_vector* p,
     sparsematrix* b,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t jj;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t k;
    ae_int_t k0;
    ae_int_t n;
    ae_bool bflag;


    ae_assert(a->matrixtype==1, "TopologicalPermutation: incorrect matrix type (convert your matrix to CRS)", _state);
    ae_assert(p->cnt>=a->n, "TopologicalPermutation: Length(P)<N", _state);
    ae_assert(a->m==a->n, "TopologicalPermutation: matrix is non-square", _state);
    ae_assert(a->ninitialized==a->ridx.ptr.p_int[a->n], "TopologicalPermutation: integrity check failed", _state);
    bflag = ae_true;
    n = a->n;
    for(i=0; i<=n-1; i++)
    {
        j = p->ptr.p_int[i];
        bflag = (bflag&&j>=0)&&j<n;
    }
    ae_assert(bflag, "TopologicalPermutation: P[] contains values outside of [0,N) range", _state);

    /*
     * Prepare output
     */
    b->matrixtype = -10082;
    b->n = n;
    b->m = n;
    ivectorsetlengthatleast(&b->didx, n, _state);
    ivectorsetlengthatleast(&b->uidx, n, _state);

    /*
     * Determine row sizes (temporary stored in DIdx) and ranges
     */
    isetv(n, 0, &b->uidx, _state);
    for(i=0; i<=n-1; i++)
    {
        j0 = a->ridx.ptr.p_int[i];
        j1 = a->uidx.ptr.p_int[i]-1;
        for(jj=j0; jj<=j1; jj++)
        {
            j = a->idx.ptr.p_int[jj];
            b->uidx.ptr.p_int[j] = b->uidx.ptr.p_int[j]+1;
        }
    }
    for(i=0; i<=n-1; i++)
    {
        b->didx.ptr.p_int[p->ptr.p_int[i]] = b->uidx.ptr.p_int[i];
    }
    ivectorsetlengthatleast(&b->ridx, n+1, _state);
    b->ridx.ptr.p_int[0] = 0;
    for(i=0; i<=n-1; i++)
    {
        b->ridx.ptr.p_int[i+1] = b->ridx.ptr.p_int[i]+b->didx.ptr.p_int[i];
        b->uidx.ptr.p_int[i] = b->ridx.ptr.p_int[i];
    }
    b->ninitialized = b->ridx.ptr.p_int[n];
    ivectorsetlengthatleast(&b->idx, b->ninitialized, _state);
    rvectorsetlengthatleast(&b->vals, b->ninitialized, _state);

    /*
     * Process matrix
     */
    for(i=0; i<=n-1; i++)
    {
        j0 = a->ridx.ptr.p_int[i];
        j1 = a->uidx.ptr.p_int[i]-1;
        k = p->ptr.p_int[i];
        for(jj=j0; jj<=j1; jj++)
        {
            j = p->ptr.p_int[a->idx.ptr.p_int[jj]];
            k0 = b->uidx.ptr.p_int[j];
            b->idx.ptr.p_int[k0] = k;
            b->vals.ptr.p_double[k0] = a->vals.ptr.p_double[jj];
            b->uidx.ptr.p_int[j] = k0+1;
        }
    }
}


/*************************************************************************
Determine nonzero pattern of the column.

This function takes as input:
* A^T - transpose of original input matrix
* index of column of L being computed
* SuperRowRIdx[] and SuperRowIdx[] - arrays that store row structure of
  supernodes, and NSuper - supernodes count
* ChildrenNodesR[], ChildrenNodesI[] - arrays that store children nodes
  for each node
* Node2Supernode[] - array that maps node indexes to supernodes
* TrueArray[] - array[N] that has all of its elements set to True (this
  invariant is preserved on output)
* Tmp0[] - array[N], temporary array

As output, it constructs nonzero pattern (diagonal element  not  included)
of  the  column #ColumnIdx on top  of  SuperRowIdx[]  array,  starting  at
location    SuperRowIdx[SuperRowRIdx[NSuper]]     and    till     location
SuperRowIdx[Result-1], where Result is a function result.

The SuperRowIdx[] array is automatically resized as needed.

It is important that this function computes nonzero pattern, but  it  does
NOT change other supernodal structures. The caller still has  to  finalize
the column (setup supernode ranges, mappings, etc).

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
static ae_int_t spchol_computenonzeropattern(sparsematrix* wrkat,
     ae_int_t columnidx,
     ae_int_t n,
     /* Integer */ ae_vector* superrowridx,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t nsuper,
     /* Integer */ ae_vector* childrennodesr,
     /* Integer */ ae_vector* childrennodesi,
     /* Integer */ ae_vector* node2supernode,
     /* Boolean */ ae_vector* truearray,
     /* Integer */ ae_vector* tmp0,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t ii;
    ae_int_t jj;
    ae_int_t i0;
    ae_int_t i1;
    ae_int_t j0;
    ae_int_t j1;
    ae_int_t cidx;
    ae_int_t rfirst;
    ae_int_t rlast;
    ae_int_t tfirst;
    ae_int_t tlast;
    ae_int_t supernodalchildrencount;
    ae_int_t result;


    ae_assert(truearray->cnt>=n, "ComputeNonzeroPattern: input temporary is too short", _state);
    ae_assert(tmp0->cnt>=n, "ComputeNonzeroPattern: input temporary is too short", _state);

    /*
     * Determine supernodal children in Tmp0
     */
    supernodalchildrencount = 0;
    i0 = childrennodesr->ptr.p_int[columnidx];
    i1 = childrennodesr->ptr.p_int[columnidx+1]-1;
    for(ii=i0; ii<=i1; ii++)
    {
        i = node2supernode->ptr.p_int[childrennodesi->ptr.p_int[ii]];
        if( truearray->ptr.p_bool[i] )
        {
            tmp0->ptr.p_int[supernodalchildrencount] = i;
            truearray->ptr.p_bool[i] = ae_false;
            supernodalchildrencount = supernodalchildrencount+1;
        }
    }
    for(i=0; i<=supernodalchildrencount-1; i++)
    {
        truearray->ptr.p_bool[tmp0->ptr.p_int[i]] = ae_true;
    }

    /*
     * Initialized column by nonzero pattern from A
     */
    rfirst = superrowridx->ptr.p_int[nsuper];
    tfirst = rfirst+n;
    igrowv(rfirst+2*n, superrowidx, _state);
    i0 = wrkat->ridx.ptr.p_int[columnidx]+1;
    i1 = wrkat->ridx.ptr.p_int[columnidx+1];
    icopyvx(i1-i0, &wrkat->idx, i0, superrowidx, rfirst, _state);
    rlast = rfirst+(i1-i0);

    /*
     * For column with small number of children use ordered merge algorithm.
     * For column with many children it is better to perform unsorted merge,
     * and then sort the sequence.
     */
    if( supernodalchildrencount<=4 )
    {

        /*
         * Ordered merge. The best approach for small number of children,
         * but may have O(N^2) running time when O(N) children are present.
         */
        for(cidx=0; cidx<=supernodalchildrencount-1; cidx++)
        {

            /*
             * Skip initial elements that do not contribute to subdiagonal nonzero pattern
             */
            i0 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]];
            i1 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]+1]-1;
            while(i0<=i1&&superrowidx->ptr.p_int[i0]<=columnidx)
            {
                i0 = i0+1;
            }
            j0 = rfirst;
            j1 = rlast-1;

            /*
             * Handle degenerate cases: empty merge target or empty merge source.
             */
            if( j1<j0 )
            {
                icopyvx(i1-i0+1, superrowidx, i0, superrowidx, rlast, _state);
                rlast = rlast+(i1-i0+1);
                continue;
            }
            if( i1<i0 )
            {
                continue;
            }

            /*
             * General case: two non-empty sorted sequences given by [I0,I1] and [J0,J1],
             * have to be merged and stored into [RFirst,RLast).
             */
            ii = superrowidx->ptr.p_int[i0];
            jj = superrowidx->ptr.p_int[j0];
            tlast = tfirst;
            for(;;)
            {
                if( ii<jj )
                {
                    superrowidx->ptr.p_int[tlast] = ii;
                    tlast = tlast+1;
                    i0 = i0+1;
                    if( i0>i1 )
                    {
                        break;
                    }
                    ii = superrowidx->ptr.p_int[i0];
                }
                if( jj<ii )
                {
                    superrowidx->ptr.p_int[tlast] = jj;
                    tlast = tlast+1;
                    j0 = j0+1;
                    if( j0>j1 )
                    {
                        break;
                    }
                    jj = superrowidx->ptr.p_int[j0];
                }
                if( jj==ii )
                {
                    superrowidx->ptr.p_int[tlast] = ii;
                    tlast = tlast+1;
                    i0 = i0+1;
                    j0 = j0+1;
                    if( i0>i1 )
                    {
                        break;
                    }
                    if( j0>j1 )
                    {
                        break;
                    }
                    ii = superrowidx->ptr.p_int[i0];
                    jj = superrowidx->ptr.p_int[j0];
                }
            }
            for(ii=i0; ii<=i1; ii++)
            {
                superrowidx->ptr.p_int[tlast] = superrowidx->ptr.p_int[ii];
                tlast = tlast+1;
            }
            for(jj=j0; jj<=j1; jj++)
            {
                superrowidx->ptr.p_int[tlast] = superrowidx->ptr.p_int[jj];
                tlast = tlast+1;
            }
            icopyvx(tlast-tfirst, superrowidx, tfirst, superrowidx, rfirst, _state);
            rlast = rfirst+(tlast-tfirst);
        }
        result = rlast;
    }
    else
    {

        /*
         * Unordered merge followed by sort. Guaranteed N*logN worst case.
         */
        for(ii=rfirst; ii<=rlast-1; ii++)
        {
            truearray->ptr.p_bool[superrowidx->ptr.p_int[ii]] = ae_false;
        }
        for(cidx=0; cidx<=supernodalchildrencount-1; cidx++)
        {

            /*
             * Skip initial elements that do not contribute to subdiagonal nonzero pattern
             */
            i0 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]];
            i1 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]+1]-1;
            while(i0<=i1&&superrowidx->ptr.p_int[i0]<=columnidx)
            {
                i0 = i0+1;
            }

            /*
             * Append elements not present in the sequence
             */
            for(ii=i0; ii<=i1; ii++)
            {
                i = superrowidx->ptr.p_int[ii];
                if( truearray->ptr.p_bool[i] )
                {
                    superrowidx->ptr.p_int[rlast] = i;
                    rlast = rlast+1;
                    truearray->ptr.p_bool[i] = ae_false;
                }
            }
        }
        for(ii=rfirst; ii<=rlast-1; ii++)
        {
            truearray->ptr.p_bool[superrowidx->ptr.p_int[ii]] = ae_true;
        }
        tagsortmiddlei(superrowidx, rfirst, rlast-rfirst, _state);
        result = rlast;
    }
    return result;
}


/*************************************************************************
Update target supernode with data from one of its children. This operation
is a supernodal equivalent  of  the  column  update  in  the  left-looking
Cholesky.

The generic update has following form:

    S := S - scatter(U*D*Uc')

where
* S is an tHeight*tWidth rectangular target matrix that is:
  * stored with tStride>=tWidth in RowStorage[OffsS:OffsS+tHeight*tStride-1]
  * lower trapezoidal i.e. its leading tWidth*tWidth  submatrix  is  lower
    triangular. One may update either entire  tWidth*tWidth  submatrix  or
    just its lower part, because upper triangle is not referenced anyway.
  * the height of S is not given because it is not actually needed
* U is an uHeight*uRank rectangular update matrix tht is:
  * stored with row stride uStride>=uRank in RowStorage[OffsU:OffsU+uHeight*uStride-1].
* Uc is the leading uWidth*uRank submatrix of U
* D is uRank*uRank diagonal matrix that is:
  * stored in DiagD[OffsD:OffsD+uRank-1]
  * unit, when Analysis.UnitD=True. In this case it can be ignored, although
    DiagD still contains 1's in all of its entries
* uHeight<=tHeight, uWidth<=tWidth, so scatter operation is needed to update
  S with smaller update.
* scatter() is an operation  that  extends  smaller  uHeight*uWidth update
  matrix U*Uc' into larger tHeight*tWidth target matrix by adding zero rows
  and columns into U*Uc':
  * I-th row of update modifies Raw2SMap[SuperRowIdx[URBase+I]]-th row  of
    the matrix S
  * J-th column of update modifies Raw2SMap[SuperRowIdx[URBase+J]]-th  col
    of the matrix S

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
static ae_int_t spchol_updatesupernode(spcholanalysis* analysis,
     ae_int_t sidx,
     ae_int_t cols0,
     ae_int_t cols1,
     ae_int_t offss,
     /* Integer */ ae_vector* raw2smap,
     ae_int_t uidx,
     ae_int_t wrkrow,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t colu0;
    ae_int_t colu1;
    ae_int_t urbase;
    ae_int_t urlast;
    ae_int_t urank;
    ae_int_t uwidth;
    ae_int_t uheight;
    ae_int_t urowstride;
    ae_int_t twidth;
    ae_int_t theight;
    ae_int_t trowstride;
    ae_int_t targetrow;
    ae_int_t targetcol;
    ae_int_t offsu;
    ae_int_t offdiagrow;
    ae_int_t lastrow;
    ae_int_t offs0;
    ae_int_t offsj;
    ae_int_t offsk;
    double v;
    ae_int_t result;


    twidth = cols1-cols0;
    theight = twidth+(analysis->superrowridx.ptr.p_int[sidx+1]-analysis->superrowridx.ptr.p_int[sidx]);
    offsu = analysis->rowoffsets.ptr.p_int[uidx];
    colu0 = analysis->supercolrange.ptr.p_int[uidx];
    colu1 = analysis->supercolrange.ptr.p_int[uidx+1];
    urbase = analysis->superrowridx.ptr.p_int[uidx];
    urlast = analysis->superrowridx.ptr.p_int[uidx+1];
    urank = colu1-colu0;
    trowstride = analysis->rowstrides.ptr.p_int[sidx];
    urowstride = analysis->rowstrides.ptr.p_int[uidx];

    /*
     * Skip leading uRank+WrkRow rows of U because they are not used.
     */
    offsu = offsu+(colu1-colu0+wrkrow)*urowstride;

    /*
     * Analyze range of rows in supernode LAdjPlus[II] and determine two subranges:
     * * one with indexes stored at SuperRowIdx[WrkRow:OffdiagRow);
     *   these indexes are the ones that intersect with range of rows/columns [ColS0,ColS1)
     *   occupied by diagonal block of the supernode SIdx
     * * one with indexes stored at SuperRowIdx[OffdiagRow:LastRow);
     *   these indexes are ones that intersect with range of rows occupied by
     *   offdiagonal block of the supernode SIdx
     */
    if( analysis->extendeddebug )
    {
        ae_assert(analysis->superrowidx.ptr.p_int[urbase+wrkrow]>=cols0, "SPCholFactorize: integrity check 6378 failed", _state);
        ae_assert(analysis->superrowidx.ptr.p_int[urbase+wrkrow]<cols1, "SPCholFactorize: integrity check 6729 failed", _state);
    }
    offdiagrow = wrkrow;
    lastrow = urlast-urbase;
    while(offdiagrow<lastrow&&analysis->superrowidx.ptr.p_int[offdiagrow+urbase]<cols1)
    {
        offdiagrow = offdiagrow+1;
    }
    uwidth = offdiagrow-wrkrow;
    uheight = lastrow-wrkrow;
    result = offdiagrow;
    if( analysis->extendeddebug )
    {

        /*
         * Extended integrity check (if requested)
         */
        ae_assert(wrkrow<offdiagrow&&analysis->superrowidx.ptr.p_int[wrkrow+urbase]>=cols0, "SPCholFactorize: integrity check failed (44trg6)", _state);
        for(i=wrkrow; i<=lastrow-1; i++)
        {
            ae_assert(raw2smap->ptr.p_int[analysis->superrowidx.ptr.p_int[i+urbase]]>=0, "SPCholFactorize: integrity check failed (43t63)", _state);
        }
    }

    /*
     * Handle special cases
     */
    if( trowstride==4 )
    {

        /*
         * Target is stride-4 column, try several kernels that may work with tWidth=3 and tWidth=4
         */
        if( ((uwidth==4&&twidth==4)&&urank==4)&&urowstride==4 )
        {
            if( spchol_updatekernel4444(&analysis->outputstorage, offss, theight, offsu, uheight, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
            {
                return result;
            }
        }
        if( spchol_updatekernelabc4(&analysis->outputstorage, offss, twidth, offsu, uheight, urank, urowstride, uwidth, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
        {
            return result;
        }
    }
    if( urank==1&&urowstride==1 )
    {
        if( spchol_updatekernelrank1(&analysis->outputstorage, offss, twidth, trowstride, offsu, uheight, uwidth, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
        {
            return result;
        }
    }
    if( urank==2&&urowstride==2 )
    {
        if( spchol_updatekernelrank2(&analysis->outputstorage, offss, twidth, trowstride, offsu, uheight, uwidth, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
        {
            return result;
        }
    }

    /*
     * Handle general update, rerefence code
     */
    ivectorsetlengthatleast(&analysis->u2smap, uheight, _state);
    for(i=0; i<=uheight-1; i++)
    {
        analysis->u2smap.ptr.p_int[i] = raw2smap->ptr.p_int[analysis->superrowidx.ptr.p_int[urbase+wrkrow+i]];
    }
    if( analysis->unitd )
    {

        /*
         * Unit D, vanilla Cholesky
         */
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+analysis->u2smap.ptr.p_int[k]*trowstride;
            for(j=0; j<=uwidth-1; j++)
            {
                targetcol = analysis->u2smap.ptr.p_int[j];
                offsj = offsu+j*urowstride;
                offsk = offsu+k*urowstride;
                offs0 = targetrow+targetcol;
                v = analysis->outputstorage.ptr.p_double[offs0];
                for(i=0; i<=urank-1; i++)
                {
                    v = v-analysis->outputstorage.ptr.p_double[offsj+i]*analysis->outputstorage.ptr.p_double[offsk+i];
                }
                analysis->outputstorage.ptr.p_double[offs0] = v;
            }
        }
    }
    else
    {

        /*
         * Non-unit D, LDLT decomposition
         */
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+analysis->u2smap.ptr.p_int[k]*trowstride;
            for(j=0; j<=uwidth-1; j++)
            {
                targetcol = analysis->u2smap.ptr.p_int[j];
                offsj = offsu+j*urowstride;
                offsk = offsu+k*urowstride;
                offs0 = targetrow+targetcol;
                v = analysis->outputstorage.ptr.p_double[offs0];
                for(i=0; i<=urank-1; i++)
                {
                    v = v-analysis->outputstorage.ptr.p_double[offsj+i]*diagd->ptr.p_double[offsd+i]*analysis->outputstorage.ptr.p_double[offsk+i];
                }
                analysis->outputstorage.ptr.p_double[offs0] = v;
            }
        }
    }
    return result;
}


/*************************************************************************
Factorizes target supernode, returns True on success, False on failure.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
static ae_bool spchol_factorizesupernode(spcholanalysis* analysis,
     ae_int_t sidx,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t cols0;
    ae_int_t cols1;
    ae_int_t offss;
    ae_int_t blocksize;
    ae_int_t offdiagsize;
    ae_int_t sstride;
    double v;
    double vs;
    double possignvraw;
    ae_bool controlpivot;
    ae_bool controloverflow;
    ae_bool result;


    cols0 = analysis->supercolrange.ptr.p_int[sidx];
    cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
    offss = analysis->rowoffsets.ptr.p_int[sidx];
    blocksize = cols1-cols0;
    offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-analysis->superrowridx.ptr.p_int[sidx];
    sstride = analysis->rowstrides.ptr.p_int[sidx];
    controlpivot = analysis->modtype==1&&ae_fp_greater(analysis->modparam0,(double)(0));
    controloverflow = analysis->modtype==1&&ae_fp_greater(analysis->modparam1,(double)(0));
    if( analysis->unitd )
    {

        /*
         * Classic Cholesky
         */
        for(j=0; j<=blocksize-1; j++)
        {

            /*
             * Compute J-th column
             */
            vs = (double)(0);
            for(k=j; k<=blocksize+offdiagsize-1; k++)
            {
                v = analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
                for(i=0; i<=j-1; i++)
                {
                    v = v-analysis->outputstorage.ptr.p_double[offss+k*sstride+i]*analysis->outputstorage.ptr.p_double[offss+j*sstride+i];
                }
                analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v;
                vs = vs+ae_fabs(v, _state);
            }
            if( controloverflow&&ae_fp_greater(vs,analysis->modparam1) )
            {

                /*
                 * Possible failure due to accumulation of numerical errors
                 */
                result = ae_false;
                return result;
            }

            /*
             * Handle pivot element
             */
            v = analysis->outputstorage.ptr.p_double[offss+j*sstride+j];
            if( controlpivot&&ae_fp_less_eq(v,analysis->modparam0) )
            {

                /*
                 * Basic modified Cholesky
                 */
                v = ae_sqrt(analysis->modparam0, _state);
                analysis->diagd.ptr.p_double[cols0+j] = 1.0;
                analysis->outputstorage.ptr.p_double[offss+j*sstride+j] = v;
                v = 1/v;
                for(k=j+1; k<=blocksize+offdiagsize-1; k++)
                {
                    analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
                }
            }
            else
            {

                /*
                 * Default case
                 */
                if( ae_fp_less_eq(v,(double)(0)) )
                {
                    result = ae_false;
                    return result;
                }
                analysis->diagd.ptr.p_double[cols0+j] = 1.0;
                v = 1/ae_sqrt(v, _state);
                for(k=j; k<=blocksize+offdiagsize-1; k++)
                {
                    analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
                }
            }
        }
    }
    else
    {

        /*
         * LDLT with diagonal D
         */
        for(j=0; j<=blocksize-1; j++)
        {

            /*
             * Compute J-th column
             */
            vs = (double)(0);
            for(k=j; k<=blocksize+offdiagsize-1; k++)
            {
                v = analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
                for(i=0; i<=j-1; i++)
                {
                    v = v-analysis->outputstorage.ptr.p_double[offss+k*sstride+i]*analysis->diagd.ptr.p_double[cols0+i]*analysis->outputstorage.ptr.p_double[offss+j*sstride+i];
                }
                analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v;
                vs = vs+ae_fabs(v, _state);
            }
            if( controloverflow&&ae_fp_greater(vs,analysis->modparam1) )
            {

                /*
                 * Possible failure due to accumulation of numerical errors
                 */
                result = ae_false;
                return result;
            }

            /*
             * Handle pivot element
             */
            possignvraw = possign(analysis->inputstorage.ptr.p_double[offss+j*sstride+j], _state);
            v = analysis->outputstorage.ptr.p_double[offss+j*sstride+j];
            if( controlpivot&&ae_fp_less_eq(v/possignvraw,analysis->modparam0) )
            {

                /*
                 * Basic modified LDLT
                 */
                v = possignvraw*analysis->modparam0;
                analysis->diagd.ptr.p_double[cols0+j] = v;
                analysis->outputstorage.ptr.p_double[offss+j*sstride+j] = 1.0;
                v = 1/v;
                for(k=j+1; k<=blocksize+offdiagsize-1; k++)
                {
                    analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
                }
            }
            else
            {

                /*
                 * Unmodified LDLT
                 */
                if( ae_fp_eq(v,(double)(0)) )
                {
                    result = ae_false;
                    return result;
                }
                analysis->diagd.ptr.p_double[cols0+j] = v;
                v = 1/v;
                for(k=j; k<=blocksize+offdiagsize-1; k++)
                {
                    analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
                }
            }
        }
    }
    result = ae_true;
    return result;
}


/*************************************************************************
This function returns recommended stride for given row size

  -- ALGLIB routine --
     20.10.2020
     Bochkanov Sergey
*************************************************************************/
static ae_int_t spchol_recommendedstridefor(ae_int_t rowsize,
     ae_state *_state)
{
    ae_int_t result;


    result = rowsize;
    if( rowsize==3 )
    {
        result = 4;
    }
    return result;
}


/*************************************************************************
This function aligns position in array in order to  better  accommodate to
SIMD specifics.

NOTE: this function aligns position measured in double precision  numbers,
      not in bits or bytes. If you want to have 256-bit aligned  position,
      round Offs to nearest multiple of 4 that is not less than Offs.

  -- ALGLIB routine --
     20.10.2020
     Bochkanov Sergey
*************************************************************************/
static ae_int_t spchol_alignpositioninarray(ae_int_t offs,
     ae_state *_state)
{
    ae_int_t result;


    result = offs;
    if( offs%4!=0 )
    {
        result = result+(4-offs%4);
    }
    return result;
}


#ifdef ALGLIB_NO_FAST_KERNELS
/*************************************************************************
Fast kernels for small supernodal updates: special 4x4x4x4 function.

! See comments on UpdateSupernode() for information  on generic supernodal
! updates, including notation used below.

The generic update has following form:

    S := S - scatter(U*D*Uc')

This specialized function performs 4x4x4x4 update, i.e.:
* S is a tHeight*4 matrix
* U is a uHeight*4 matrix
* Uc' is a 4*4 matrix
* scatter() scatters rows of U*Uc', but does not scatter columns (they are
  densely packed).

Return value:
* True if update was applied
* False if kernel refused to perform an update.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
static ae_bool spchol_updatekernel4444(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t sheight,
     ae_int_t offsu,
     ae_int_t uheight,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state)
{
    ae_int_t k;
    ae_int_t targetrow;
    ae_int_t offsk;
    double d0;
    double d1;
    double d2;
    double d3;
    double u00;
    double u01;
    double u02;
    double u03;
    double u10;
    double u11;
    double u12;
    double u13;
    double u20;
    double u21;
    double u22;
    double u23;
    double u30;
    double u31;
    double u32;
    double u33;
    double uk0;
    double uk1;
    double uk2;
    double uk3;
    ae_bool result;


    d0 = diagd->ptr.p_double[offsd+0];
    d1 = diagd->ptr.p_double[offsd+1];
    d2 = diagd->ptr.p_double[offsd+2];
    d3 = diagd->ptr.p_double[offsd+3];
    u00 = d0*rowstorage->ptr.p_double[offsu+0*4+0];
    u01 = d1*rowstorage->ptr.p_double[offsu+0*4+1];
    u02 = d2*rowstorage->ptr.p_double[offsu+0*4+2];
    u03 = d3*rowstorage->ptr.p_double[offsu+0*4+3];
    u10 = d0*rowstorage->ptr.p_double[offsu+1*4+0];
    u11 = d1*rowstorage->ptr.p_double[offsu+1*4+1];
    u12 = d2*rowstorage->ptr.p_double[offsu+1*4+2];
    u13 = d3*rowstorage->ptr.p_double[offsu+1*4+3];
    u20 = d0*rowstorage->ptr.p_double[offsu+2*4+0];
    u21 = d1*rowstorage->ptr.p_double[offsu+2*4+1];
    u22 = d2*rowstorage->ptr.p_double[offsu+2*4+2];
    u23 = d3*rowstorage->ptr.p_double[offsu+2*4+3];
    u30 = d0*rowstorage->ptr.p_double[offsu+3*4+0];
    u31 = d1*rowstorage->ptr.p_double[offsu+3*4+1];
    u32 = d2*rowstorage->ptr.p_double[offsu+3*4+2];
    u33 = d3*rowstorage->ptr.p_double[offsu+3*4+3];
    for(k=0; k<=uheight-1; k++)
    {
        targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
        offsk = offsu+k*4;
        uk0 = rowstorage->ptr.p_double[offsk+0];
        uk1 = rowstorage->ptr.p_double[offsk+1];
        uk2 = rowstorage->ptr.p_double[offsk+2];
        uk3 = rowstorage->ptr.p_double[offsk+3];
        rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
        rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
        rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
        rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
    }
    result = ae_true;
    return result;
}
#endif


#ifdef ALGLIB_NO_FAST_KERNELS
/*************************************************************************
Fast kernels for small supernodal updates: special 4x4x4x4 function.

! See comments on UpdateSupernode() for information  on generic supernodal
! updates, including notation used below.

The generic update has following form:

    S := S - scatter(U*D*Uc')

This specialized function performs AxBxCx4 update, i.e.:
* S is a tHeight*A matrix with row stride equal to 4 (usually it means that
  it has 3 or 4 columns)
* U is a uHeight*B matrix
* Uc' is a B*C matrix, with C<=A
* scatter() scatters rows and columns of U*Uc'

Return value:
* True if update was applied
* False if kernel refused to perform an update (quick exit for unsupported
  combinations of input sizes)

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
static ae_bool spchol_updatekernelabc4(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t twidth,
     ae_int_t offsu,
     ae_int_t uheight,
     ae_int_t urank,
     ae_int_t urowstride,
     ae_int_t uwidth,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state)
{
    ae_int_t k;
    ae_int_t targetrow;
    ae_int_t targetcol;
    ae_int_t offsk;
    double d0;
    double d1;
    double d2;
    double d3;
    double u00;
    double u01;
    double u02;
    double u03;
    double u10;
    double u11;
    double u12;
    double u13;
    double u20;
    double u21;
    double u22;
    double u23;
    double u30;
    double u31;
    double u32;
    double u33;
    double uk0;
    double uk1;
    double uk2;
    double uk3;
    ae_int_t srccol0;
    ae_int_t srccol1;
    ae_int_t srccol2;
    ae_int_t srccol3;
    ae_bool result;


    /*
     * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
     */
    result = ae_false;
    if( twidth<3||twidth>4 )
    {
        return result;
    }
    if( uwidth<3||uwidth>4 )
    {
        return result;
    }
    if( urank>4 )
    {
        return result;
    }

    /*
     * Determine source columns for target columns, -1 if target column
     * is not updated.
     */
    srccol0 = -1;
    srccol1 = -1;
    srccol2 = -1;
    srccol3 = -1;
    for(k=0; k<=uwidth-1; k++)
    {
        targetcol = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]];
        if( targetcol==0 )
        {
            srccol0 = k;
        }
        if( targetcol==1 )
        {
            srccol1 = k;
        }
        if( targetcol==2 )
        {
            srccol2 = k;
        }
        if( targetcol==3 )
        {
            srccol3 = k;
        }
    }

    /*
     * Load update matrix into aligned/rearranged 4x4 storage
     */
    d0 = (double)(0);
    d1 = (double)(0);
    d2 = (double)(0);
    d3 = (double)(0);
    u00 = (double)(0);
    u01 = (double)(0);
    u02 = (double)(0);
    u03 = (double)(0);
    u10 = (double)(0);
    u11 = (double)(0);
    u12 = (double)(0);
    u13 = (double)(0);
    u20 = (double)(0);
    u21 = (double)(0);
    u22 = (double)(0);
    u23 = (double)(0);
    u30 = (double)(0);
    u31 = (double)(0);
    u32 = (double)(0);
    u33 = (double)(0);
    if( urank>=1 )
    {
        d0 = diagd->ptr.p_double[offsd+0];
    }
    if( urank>=2 )
    {
        d1 = diagd->ptr.p_double[offsd+1];
    }
    if( urank>=3 )
    {
        d2 = diagd->ptr.p_double[offsd+2];
    }
    if( urank>=4 )
    {
        d3 = diagd->ptr.p_double[offsd+3];
    }
    if( srccol0>=0 )
    {
        if( urank>=1 )
        {
            u00 = d0*rowstorage->ptr.p_double[offsu+srccol0*urowstride+0];
        }
        if( urank>=2 )
        {
            u01 = d1*rowstorage->ptr.p_double[offsu+srccol0*urowstride+1];
        }
        if( urank>=3 )
        {
            u02 = d2*rowstorage->ptr.p_double[offsu+srccol0*urowstride+2];
        }
        if( urank>=4 )
        {
            u03 = d3*rowstorage->ptr.p_double[offsu+srccol0*urowstride+3];
        }
    }
    if( srccol1>=0 )
    {
        if( urank>=1 )
        {
            u10 = d0*rowstorage->ptr.p_double[offsu+srccol1*urowstride+0];
        }
        if( urank>=2 )
        {
            u11 = d1*rowstorage->ptr.p_double[offsu+srccol1*urowstride+1];
        }
        if( urank>=3 )
        {
            u12 = d2*rowstorage->ptr.p_double[offsu+srccol1*urowstride+2];
        }
        if( urank>=4 )
        {
            u13 = d3*rowstorage->ptr.p_double[offsu+srccol1*urowstride+3];
        }
    }
    if( srccol2>=0 )
    {
        if( urank>=1 )
        {
            u20 = d0*rowstorage->ptr.p_double[offsu+srccol2*urowstride+0];
        }
        if( urank>=2 )
        {
            u21 = d1*rowstorage->ptr.p_double[offsu+srccol2*urowstride+1];
        }
        if( urank>=3 )
        {
            u22 = d2*rowstorage->ptr.p_double[offsu+srccol2*urowstride+2];
        }
        if( urank>=4 )
        {
            u23 = d3*rowstorage->ptr.p_double[offsu+srccol2*urowstride+3];
        }
    }
    if( srccol3>=0 )
    {
        if( urank>=1 )
        {
            u30 = d0*rowstorage->ptr.p_double[offsu+srccol3*urowstride+0];
        }
        if( urank>=2 )
        {
            u31 = d1*rowstorage->ptr.p_double[offsu+srccol3*urowstride+1];
        }
        if( urank>=3 )
        {
            u32 = d2*rowstorage->ptr.p_double[offsu+srccol3*urowstride+2];
        }
        if( urank>=4 )
        {
            u33 = d3*rowstorage->ptr.p_double[offsu+srccol3*urowstride+3];
        }
    }

    /*
     * Run update
     */
    if( urank==1 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
            offsk = offsu+k*urowstride;
            uk0 = rowstorage->ptr.p_double[offsk+0];
            rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0;
            rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0;
            rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0;
            rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0;
        }
    }
    if( urank==2 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
            offsk = offsu+k*urowstride;
            uk0 = rowstorage->ptr.p_double[offsk+0];
            uk1 = rowstorage->ptr.p_double[offsk+1];
            rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1;
            rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1;
            rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1;
            rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1;
        }
    }
    if( urank==3 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
            offsk = offsu+k*urowstride;
            uk0 = rowstorage->ptr.p_double[offsk+0];
            uk1 = rowstorage->ptr.p_double[offsk+1];
            uk2 = rowstorage->ptr.p_double[offsk+2];
            rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2;
            rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2;
            rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2;
            rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2;
        }
    }
    if( urank==4 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
            offsk = offsu+k*urowstride;
            uk0 = rowstorage->ptr.p_double[offsk+0];
            uk1 = rowstorage->ptr.p_double[offsk+1];
            uk2 = rowstorage->ptr.p_double[offsk+2];
            uk3 = rowstorage->ptr.p_double[offsk+3];
            rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
            rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
            rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
            rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
        }
    }
    result = ae_true;
    return result;
}
#endif


/*************************************************************************
Fast kernels for small supernodal updates: special rank-1 function.

! See comments on UpdateSupernode() for information  on generic supernodal
! updates, including notation used below.

The generic update has following form:

    S := S - scatter(U*D*Uc')

This specialized function performs rank-1 update, i.e.:
* S is a tHeight*A matrix, with A<=4
* U is a uHeight*1 matrix with unit stride
* Uc' is a 1*B matrix, with B<=A
* scatter() scatters rows and columns of U*Uc'

Return value:
* True if update was applied
* False if kernel refused to perform an update (quick exit for unsupported
  combinations of input sizes)

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
static ae_bool spchol_updatekernelrank1(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t twidth,
     ae_int_t trowstride,
     ae_int_t offsu,
     ae_int_t uheight,
     ae_int_t uwidth,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state)
{
    ae_int_t k;
    ae_int_t targetrow;
    double d0;
    double u00;
    double u10;
    double u20;
    double u30;
    double uk;
    ae_int_t col0;
    ae_int_t col1;
    ae_int_t col2;
    ae_int_t col3;
    ae_bool result;


    /*
     * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
     */
    result = ae_false;
    if( twidth>4 )
    {
        return result;
    }
    if( uwidth>4 )
    {
        return result;
    }

    /*
     * Determine target columns, load update matrix
     */
    d0 = diagd->ptr.p_double[offsd];
    col0 = 0;
    col1 = 0;
    col2 = 0;
    col3 = 0;
    u00 = (double)(0);
    u10 = (double)(0);
    u20 = (double)(0);
    u30 = (double)(0);
    if( uwidth>=1 )
    {
        col0 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+0]];
        u00 = d0*rowstorage->ptr.p_double[offsu+0];
    }
    if( uwidth>=2 )
    {
        col1 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+1]];
        u10 = d0*rowstorage->ptr.p_double[offsu+1];
    }
    if( uwidth>=3 )
    {
        col2 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+2]];
        u20 = d0*rowstorage->ptr.p_double[offsu+2];
    }
    if( uwidth>=4 )
    {
        col3 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+3]];
        u30 = d0*rowstorage->ptr.p_double[offsu+3];
    }

    /*
     * Run update
     */
    if( uwidth==1 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk = rowstorage->ptr.p_double[offsu+k];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
        }
    }
    if( uwidth==2 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk = rowstorage->ptr.p_double[offsu+k];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
            rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk;
        }
    }
    if( uwidth==3 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk = rowstorage->ptr.p_double[offsu+k];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
            rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk;
            rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk;
        }
    }
    if( uwidth==4 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk = rowstorage->ptr.p_double[offsu+k];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
            rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk;
            rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk;
            rowstorage->ptr.p_double[targetrow+col3] = rowstorage->ptr.p_double[targetrow+col3]-u30*uk;
        }
    }
    result = ae_true;
    return result;
}


/*************************************************************************
Fast kernels for small supernodal updates: special rank-2 function.

! See comments on UpdateSupernode() for information  on generic supernodal
! updates, including notation used below.

The generic update has following form:

    S := S - scatter(U*D*Uc')

This specialized function performs rank-2 update, i.e.:
* S is a tHeight*A matrix, with A<=4
* U is a uHeight*2 matrix with row stride equal to 2
* Uc' is a 2*B matrix, with B<=A
* scatter() scatters rows and columns of U*Uc

Return value:
* True if update was applied
* False if kernel refused to perform an update (quick exit for unsupported
  combinations of input sizes)

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
static ae_bool spchol_updatekernelrank2(/* Real    */ ae_vector* rowstorage,
     ae_int_t offss,
     ae_int_t twidth,
     ae_int_t trowstride,
     ae_int_t offsu,
     ae_int_t uheight,
     ae_int_t uwidth,
     /* Real    */ ae_vector* diagd,
     ae_int_t offsd,
     /* Integer */ ae_vector* raw2smap,
     /* Integer */ ae_vector* superrowidx,
     ae_int_t urbase,
     ae_state *_state)
{
    ae_int_t k;
    ae_int_t targetrow;
    double d0;
    double d1;
    double u00;
    double u10;
    double u20;
    double u30;
    double u01;
    double u11;
    double u21;
    double u31;
    double uk0;
    double uk1;
    ae_int_t col0;
    ae_int_t col1;
    ae_int_t col2;
    ae_int_t col3;
    ae_bool result;


    /*
     * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
     */
    result = ae_false;
    if( twidth>4 )
    {
        return result;
    }
    if( uwidth>4 )
    {
        return result;
    }

    /*
     * Determine target columns, load update matrix
     */
    d0 = diagd->ptr.p_double[offsd];
    d1 = diagd->ptr.p_double[offsd+1];
    col0 = 0;
    col1 = 0;
    col2 = 0;
    col3 = 0;
    u00 = (double)(0);
    u01 = (double)(0);
    u10 = (double)(0);
    u11 = (double)(0);
    u20 = (double)(0);
    u21 = (double)(0);
    u30 = (double)(0);
    u31 = (double)(0);
    if( uwidth>=1 )
    {
        col0 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+0]];
        u00 = d0*rowstorage->ptr.p_double[offsu+0];
        u01 = d1*rowstorage->ptr.p_double[offsu+1];
    }
    if( uwidth>=2 )
    {
        col1 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+1]];
        u10 = d0*rowstorage->ptr.p_double[offsu+1*2+0];
        u11 = d1*rowstorage->ptr.p_double[offsu+1*2+1];
    }
    if( uwidth>=3 )
    {
        col2 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+2]];
        u20 = d0*rowstorage->ptr.p_double[offsu+2*2+0];
        u21 = d1*rowstorage->ptr.p_double[offsu+2*2+1];
    }
    if( uwidth>=4 )
    {
        col3 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+3]];
        u30 = d0*rowstorage->ptr.p_double[offsu+3*2+0];
        u31 = d1*rowstorage->ptr.p_double[offsu+3*2+1];
    }

    /*
     * Run update
     */
    if( uwidth==1 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
            uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
        }
    }
    if( uwidth==2 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
            uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
            rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk0-u11*uk1;
        }
    }
    if( uwidth==3 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
            uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
            rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk0-u11*uk1;
            rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk0-u21*uk1;
        }
    }
    if( uwidth==4 )
    {
        for(k=0; k<=uheight-1; k++)
        {
            targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
            uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
            uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
            rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
            rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk0-u11*uk1;
            rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk0-u21*uk1;
            rowstorage->ptr.p_double[targetrow+col3] = rowstorage->ptr.p_double[targetrow+col3]-u30*uk0-u31*uk1;
        }
    }
    result = ae_true;
    return result;
}


/*************************************************************************
Debug checks for sparsity structure

  -- ALGLIB routine --
     22.08.2021
     Bochkanov Sergey
*************************************************************************/
static void spchol_slowdebugchecks(sparsematrix* a,
     /* Integer */ ae_vector* fillinperm,
     ae_int_t n,
     ae_int_t tail,
     sparsematrix* referencetaila,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    sparsematrix perma;
    ae_matrix densea;

    ae_frame_make(_state, &_frame_block);
    memset(&perma, 0, sizeof(perma));
    memset(&densea, 0, sizeof(densea));
    _sparsematrix_init(&perma, _state, ae_true);
    ae_matrix_init(&densea, 0, 0, DT_REAL, _state, ae_true);

    sparsesymmpermtblbuf(a, ae_false, fillinperm, &perma, _state);
    ae_matrix_set_length(&densea, n, n, _state);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=i; j++)
        {
            if( !sparseexists(&perma, i, j, _state) )
            {
                densea.ptr.pp_double[i][j] = (double)(0);
                continue;
            }
            if( i==j )
            {
                densea.ptr.pp_double[i][j] = (double)(1);
            }
            else
            {
                densea.ptr.pp_double[i][j] = 0.01*(ae_cos((double)(i+1), _state)+1.23*ae_sin((double)(j+1), _state))/n;
            }
        }
    }
    ae_assert(spchol_dbgmatrixcholesky2(&densea, 0, n-tail, ae_false, _state), "densechol failed", _state);
    rmatrixrighttrsm(tail, n-tail, &densea, 0, 0, ae_false, ae_false, 1, &densea, n-tail, 0, _state);
    rmatrixsyrk(tail, n-tail, -1.0, &densea, n-tail, 0, 0, 1.0, &densea, n-tail, n-tail, ae_false, _state);
    for(i=n-tail; i<=n-1; i++)
    {
        for(j=n-tail; j<=i; j++)
        {
            ae_assert(!(ae_fp_eq(densea.ptr.pp_double[i][j],(double)(0))&&sparseexists(referencetaila, i-(n-tail), j-(n-tail), _state)), "SPSymmAnalyze: structure check 1 failed", _state);
            ae_assert(!(ae_fp_neq(densea.ptr.pp_double[i][j],(double)(0))&&!sparseexists(referencetaila, i-(n-tail), j-(n-tail), _state)), "SPSymmAnalyze: structure check 2 failed", _state);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Dense Cholesky driver for internal integrity checks

  -- ALGLIB routine --
     22.08.2021
     Bochkanov Sergey
*************************************************************************/
static ae_bool spchol_dbgmatrixcholesky2(/* Real    */ ae_matrix* aaa,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double ajj;
    double v;
    double r;
    ae_vector tmp;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);

    ae_vector_set_length(&tmp, 2*n, _state);
    result = ae_true;
    if( n<0 )
    {
        result = ae_false;
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Quick return if possible
     */
    if( n==0 )
    {
        ae_frame_leave(_state);
        return result;
    }
    if( isupper )
    {

        /*
         * Compute the Cholesky factorization A = U'*U.
         */
        for(j=0; j<=n-1; j++)
        {

            /*
             * Compute U(J,J) and test for non-positive-definiteness.
             */
            v = ae_v_dotproduct(&aaa->ptr.pp_double[offs][offs+j], aaa->stride, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(offs,offs+j-1));
            ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
            if( ae_fp_less_eq(ajj,(double)(0)) )
            {
                aaa->ptr.pp_double[offs+j][offs+j] = ajj;
                result = ae_false;
                ae_frame_leave(_state);
                return result;
            }
            ajj = ae_sqrt(ajj, _state);
            aaa->ptr.pp_double[offs+j][offs+j] = ajj;

            /*
             * Compute elements J+1:N-1 of row J.
             */
            if( j<n-1 )
            {
                if( j>0 )
                {
                    ae_v_moveneg(&tmp.ptr.p_double[0], 1, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(0,j-1));
                    rmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, &tmp, 0, &tmp, n, _state);
                    ae_v_add(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, &tmp.ptr.p_double[n], 1, ae_v_len(offs+j+1,offs+n-1));
                }
                r = 1/ajj;
                ae_v_muld(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
            }
        }
    }
    else
    {

        /*
         * Compute the Cholesky factorization A = L*L'.
         */
        for(j=0; j<=n-1; j++)
        {

            /*
             * Compute L(J+1,J+1) and test for non-positive-definiteness.
             */
            v = ae_v_dotproduct(&aaa->ptr.pp_double[offs+j][offs], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(offs,offs+j-1));
            ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
            if( ae_fp_less_eq(ajj,(double)(0)) )
            {
                aaa->ptr.pp_double[offs+j][offs+j] = ajj;
                result = ae_false;
                ae_frame_leave(_state);
                return result;
            }
            ajj = ae_sqrt(ajj, _state);
            aaa->ptr.pp_double[offs+j][offs+j] = ajj;

            /*
             * Compute elements J+1:N of column J.
             */
            if( j<n-1 )
            {
                r = 1/ajj;
                if( j>0 )
                {
                    ae_v_move(&tmp.ptr.p_double[0], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(0,j-1));
                    rmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, &tmp, 0, &tmp, n, _state);
                    for(i=0; i<=n-j-2; i++)
                    {
                        aaa->ptr.pp_double[offs+j+1+i][offs+j] = (aaa->ptr.pp_double[offs+j+1+i][offs+j]-tmp.ptr.p_double[n+i])*r;
                    }
                }
                else
                {
                    for(i=0; i<=n-j-2; i++)
                    {
                        aaa->ptr.pp_double[offs+j+1+i][offs+j] = aaa->ptr.pp_double[offs+j+1+i][offs+j]*r;
                    }
                }
            }
        }
    }
    ae_frame_leave(_state);
    return result;
}


void _spcholanalysis_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    spcholanalysis *p = (spcholanalysis*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->parentsupernode, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->supercolrange, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->superrowridx, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->superrowidx, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->fillinperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->invfillinperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->superperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->invsuperperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->effectiveperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->inveffectiveperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->ladjplusr, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->ladjplus, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->outrowcounts, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->inputstorage, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->outputstorage, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->rowstrides, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->rowoffsets, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->diagd, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->wrkrows, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->flagarray, 0, DT_BOOL, _state, make_automatic);
    ae_vector_init(&p->tmpparent, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->node2supernode, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->u2smap, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->raw2smap, 0, DT_INT, _state, make_automatic);
    _amdbuffer_init(&p->amdtmp, _state, make_automatic);
    ae_vector_init(&p->tmp0, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->tmp1, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->tmp2, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->tmp3, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->tmp4, 0, DT_INT, _state, make_automatic);
    _sparsematrix_init(&p->tmpa, _state, make_automatic);
    _sparsematrix_init(&p->tmpat, _state, make_automatic);
    _sparsematrix_init(&p->tmpa2, _state, make_automatic);
    _sparsematrix_init(&p->tmpbottomt, _state, make_automatic);
    _sparsematrix_init(&p->tmpupdate, _state, make_automatic);
    _sparsematrix_init(&p->tmpupdatet, _state, make_automatic);
    _sparsematrix_init(&p->tmpnewtailt, _state, make_automatic);
    ae_vector_init(&p->tmpperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->invtmpperm, 0, DT_INT, _state, make_automatic);
    ae_vector_init(&p->tmpx, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->simdbuf, 0, DT_REAL, _state, make_automatic);
}


void _spcholanalysis_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    spcholanalysis *dst = (spcholanalysis*)_dst;
    spcholanalysis *src = (spcholanalysis*)_src;
    dst->tasktype = src->tasktype;
    dst->n = src->n;
    dst->permtype = src->permtype;
    dst->unitd = src->unitd;
    dst->modtype = src->modtype;
    dst->modparam0 = src->modparam0;
    dst->modparam1 = src->modparam1;
    dst->modparam2 = src->modparam2;
    dst->modparam3 = src->modparam3;
    dst->extendeddebug = src->extendeddebug;
    dst->dotrace = src->dotrace;
    dst->dotracesupernodalstructure = src->dotracesupernodalstructure;
    dst->nsuper = src->nsuper;
    ae_vector_init_copy(&dst->parentsupernode, &src->parentsupernode, _state, make_automatic);
    ae_vector_init_copy(&dst->supercolrange, &src->supercolrange, _state, make_automatic);
    ae_vector_init_copy(&dst->superrowridx, &src->superrowridx, _state, make_automatic);
    ae_vector_init_copy(&dst->superrowidx, &src->superrowidx, _state, make_automatic);
    ae_vector_init_copy(&dst->fillinperm, &src->fillinperm, _state, make_automatic);
    ae_vector_init_copy(&dst->invfillinperm, &src->invfillinperm, _state, make_automatic);
    ae_vector_init_copy(&dst->superperm, &src->superperm, _state, make_automatic);
    ae_vector_init_copy(&dst->invsuperperm, &src->invsuperperm, _state, make_automatic);
    ae_vector_init_copy(&dst->effectiveperm, &src->effectiveperm, _state, make_automatic);
    ae_vector_init_copy(&dst->inveffectiveperm, &src->inveffectiveperm, _state, make_automatic);
    dst->istopologicalordering = src->istopologicalordering;
    dst->applypermutationtooutput = src->applypermutationtooutput;
    ae_vector_init_copy(&dst->ladjplusr, &src->ladjplusr, _state, make_automatic);
    ae_vector_init_copy(&dst->ladjplus, &src->ladjplus, _state, make_automatic);
    ae_vector_init_copy(&dst->outrowcounts, &src->outrowcounts, _state, make_automatic);
    ae_vector_init_copy(&dst->inputstorage, &src->inputstorage, _state, make_automatic);
    ae_vector_init_copy(&dst->outputstorage, &src->outputstorage, _state, make_automatic);
    ae_vector_init_copy(&dst->rowstrides, &src->rowstrides, _state, make_automatic);
    ae_vector_init_copy(&dst->rowoffsets, &src->rowoffsets, _state, make_automatic);
    ae_vector_init_copy(&dst->diagd, &src->diagd, _state, make_automatic);
    ae_vector_init_copy(&dst->wrkrows, &src->wrkrows, _state, make_automatic);
    ae_vector_init_copy(&dst->flagarray, &src->flagarray, _state, make_automatic);
    ae_vector_init_copy(&dst->tmpparent, &src->tmpparent, _state, make_automatic);
    ae_vector_init_copy(&dst->node2supernode, &src->node2supernode, _state, make_automatic);
    ae_vector_init_copy(&dst->u2smap, &src->u2smap, _state, make_automatic);
    ae_vector_init_copy(&dst->raw2smap, &src->raw2smap, _state, make_automatic);
    _amdbuffer_init_copy(&dst->amdtmp, &src->amdtmp, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp2, &src->tmp2, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp3, &src->tmp3, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp4, &src->tmp4, _state, make_automatic);
    _sparsematrix_init_copy(&dst->tmpa, &src->tmpa, _state, make_automatic);
    _sparsematrix_init_copy(&dst->tmpat, &src->tmpat, _state, make_automatic);
    _sparsematrix_init_copy(&dst->tmpa2, &src->tmpa2, _state, make_automatic);
    _sparsematrix_init_copy(&dst->tmpbottomt, &src->tmpbottomt, _state, make_automatic);
    _sparsematrix_init_copy(&dst->tmpupdate, &src->tmpupdate, _state, make_automatic);
    _sparsematrix_init_copy(&dst->tmpupdatet, &src->tmpupdatet, _state, make_automatic);
    _sparsematrix_init_copy(&dst->tmpnewtailt, &src->tmpnewtailt, _state, make_automatic);
    ae_vector_init_copy(&dst->tmpperm, &src->tmpperm, _state, make_automatic);
    ae_vector_init_copy(&dst->invtmpperm, &src->invtmpperm, _state, make_automatic);
    ae_vector_init_copy(&dst->tmpx, &src->tmpx, _state, make_automatic);
    ae_vector_init_copy(&dst->simdbuf, &src->simdbuf, _state, make_automatic);
}


void _spcholanalysis_clear(void* _p)
{
    spcholanalysis *p = (spcholanalysis*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->parentsupernode);
    ae_vector_clear(&p->supercolrange);
    ae_vector_clear(&p->superrowridx);
    ae_vector_clear(&p->superrowidx);
    ae_vector_clear(&p->fillinperm);
    ae_vector_clear(&p->invfillinperm);
    ae_vector_clear(&p->superperm);
    ae_vector_clear(&p->invsuperperm);
    ae_vector_clear(&p->effectiveperm);
    ae_vector_clear(&p->inveffectiveperm);
    ae_vector_clear(&p->ladjplusr);
    ae_vector_clear(&p->ladjplus);
    ae_vector_clear(&p->outrowcounts);
    ae_vector_clear(&p->inputstorage);
    ae_vector_clear(&p->outputstorage);
    ae_vector_clear(&p->rowstrides);
    ae_vector_clear(&p->rowoffsets);
    ae_vector_clear(&p->diagd);
    ae_vector_clear(&p->wrkrows);
    ae_vector_clear(&p->flagarray);
    ae_vector_clear(&p->tmpparent);
    ae_vector_clear(&p->node2supernode);
    ae_vector_clear(&p->u2smap);
    ae_vector_clear(&p->raw2smap);
    _amdbuffer_clear(&p->amdtmp);
    ae_vector_clear(&p->tmp0);
    ae_vector_clear(&p->tmp1);
    ae_vector_clear(&p->tmp2);
    ae_vector_clear(&p->tmp3);
    ae_vector_clear(&p->tmp4);
    _sparsematrix_clear(&p->tmpa);
    _sparsematrix_clear(&p->tmpat);
    _sparsematrix_clear(&p->tmpa2);
    _sparsematrix_clear(&p->tmpbottomt);
    _sparsematrix_clear(&p->tmpupdate);
    _sparsematrix_clear(&p->tmpupdatet);
    _sparsematrix_clear(&p->tmpnewtailt);
    ae_vector_clear(&p->tmpperm);
    ae_vector_clear(&p->invtmpperm);
    ae_vector_clear(&p->tmpx);
    ae_vector_clear(&p->simdbuf);
}


void _spcholanalysis_destroy(void* _p)
{
    spcholanalysis *p = (spcholanalysis*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->parentsupernode);
    ae_vector_destroy(&p->supercolrange);
    ae_vector_destroy(&p->superrowridx);
    ae_vector_destroy(&p->superrowidx);
    ae_vector_destroy(&p->fillinperm);
    ae_vector_destroy(&p->invfillinperm);
    ae_vector_destroy(&p->superperm);
    ae_vector_destroy(&p->invsuperperm);
    ae_vector_destroy(&p->effectiveperm);
    ae_vector_destroy(&p->inveffectiveperm);
    ae_vector_destroy(&p->ladjplusr);
    ae_vector_destroy(&p->ladjplus);
    ae_vector_destroy(&p->outrowcounts);
    ae_vector_destroy(&p->inputstorage);
    ae_vector_destroy(&p->outputstorage);
    ae_vector_destroy(&p->rowstrides);
    ae_vector_destroy(&p->rowoffsets);
    ae_vector_destroy(&p->diagd);
    ae_vector_destroy(&p->wrkrows);
    ae_vector_destroy(&p->flagarray);
    ae_vector_destroy(&p->tmpparent);
    ae_vector_destroy(&p->node2supernode);
    ae_vector_destroy(&p->u2smap);
    ae_vector_destroy(&p->raw2smap);
    _amdbuffer_destroy(&p->amdtmp);
    ae_vector_destroy(&p->tmp0);
    ae_vector_destroy(&p->tmp1);
    ae_vector_destroy(&p->tmp2);
    ae_vector_destroy(&p->tmp3);
    ae_vector_destroy(&p->tmp4);
    _sparsematrix_destroy(&p->tmpa);
    _sparsematrix_destroy(&p->tmpat);
    _sparsematrix_destroy(&p->tmpa2);
    _sparsematrix_destroy(&p->tmpbottomt);
    _sparsematrix_destroy(&p->tmpupdate);
    _sparsematrix_destroy(&p->tmpupdatet);
    _sparsematrix_destroy(&p->tmpnewtailt);
    ae_vector_destroy(&p->tmpperm);
    ae_vector_destroy(&p->invtmpperm);
    ae_vector_destroy(&p->tmpx);
    ae_vector_destroy(&p->simdbuf);
}


#endif
#if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
LU decomposition of a general real matrix with row pivoting

A is represented as A = P*L*U, where:
* L is lower unitriangular matrix
* U is upper triangular matrix
* P = P0*P1*...*PK, K=min(M,N)-1,
  Pi - permutation matrix for I and Pivots[I]

INPUT PARAMETERS:
    A       -   array[0..M-1, 0..N-1].
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.


OUTPUT PARAMETERS:
    A       -   matrices L and U in compact form:
                * L is stored under main diagonal
                * U is stored on and above main diagonal
    Pivots  -   permutation matrix in compact form.
                array[0..Min(M-1,N-1)].

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.01.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixlu(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     ae_state *_state)
{

    ae_vector_clear(pivots);

    ae_assert(m>0, "RMatrixLU: incorrect M!", _state);
    ae_assert(n>0, "RMatrixLU: incorrect N!", _state);
    rmatrixplu(a, m, n, pivots, _state);
}


/*************************************************************************
LU decomposition of a general complex matrix with row pivoting

A is represented as A = P*L*U, where:
* L is lower unitriangular matrix
* U is upper triangular matrix
* P = P0*P1*...*PK, K=min(M,N)-1,
  Pi - permutation matrix for I and Pivots[I]

INPUT PARAMETERS:
    A       -   array[0..M-1, 0..N-1].
    M       -   number of rows in matrix A.
    N       -   number of columns in matrix A.


OUTPUT PARAMETERS:
    A       -   matrices L and U in compact form:
                * L is stored under main diagonal
                * U is stored on and above main diagonal
    Pivots  -   permutation matrix in compact form.
                array[0..Min(M-1,N-1)].

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.01.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixlu(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     ae_state *_state)
{

    ae_vector_clear(pivots);

    ae_assert(m>0, "CMatrixLU: incorrect M!", _state);
    ae_assert(n>0, "CMatrixLU: incorrect N!", _state);
    cmatrixplu(a, m, n, pivots, _state);
}


/*************************************************************************
Cache-oblivious Cholesky decomposition

The algorithm computes Cholesky decomposition  of  a  Hermitian  positive-
definite matrix. The result of an algorithm is a representation  of  A  as
A=U'*U  or A=L*L' (here X' denotes conj(X^T)).

INPUT PARAMETERS:
    A       -   upper or lower triangle of a factorized matrix.
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   if IsUpper=True, then A contains an upper triangle of
                a symmetric matrix, otherwise A contains a lower one.

OUTPUT PARAMETERS:
    A       -   the result of factorization. If IsUpper=True, then
                the upper triangle contains matrix U, so that A = U'*U,
                and the elements below the main diagonal are not modified.
                Similarly, if IsUpper = False.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False. Contents of A is not determined
    in such case.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009-22.01.2018
     Bochkanov Sergey
*************************************************************************/
ae_bool hpdmatrixcholesky(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tmp;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);

    if( n<1 )
    {
        result = ae_false;
        ae_frame_leave(_state);
        return result;
    }
    result = trfac_hpdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Cache-oblivious Cholesky decomposition

The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
definite matrix. The result of an algorithm is a representation  of  A  as
A=U^T*U  or A=L*L^T

INPUT PARAMETERS:
    A       -   upper or lower triangle of a factorized matrix.
                array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   if IsUpper=True, then A contains an upper triangle of
                a symmetric matrix, otherwise A contains a lower one.

OUTPUT PARAMETERS:
    A       -   the result of factorization. If IsUpper=True, then
                the upper triangle contains matrix U, so that A = U^T*U,
                and the elements below the main diagonal are not modified.
                Similarly, if IsUpper = False.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False. Contents of A is not determined
    in such case.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
ae_bool spdmatrixcholesky(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tmp;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);

    if( n<1 )
    {
        result = ae_false;
        ae_frame_leave(_state);
        return result;
    }
    result = spdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
version which uses preallocated buffer which is saved  between  subsequent
function calls.

This function uses internally allocated buffer which is not saved  between
subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
we  recommend   you   to   use   "buffered"   version   of  this function:
SPDMatrixCholeskyUpdateAdd1Buf().

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    U       -   array[N], rank-1 update to A: A_mod = A + u*u'
                Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

NOTE: this function always succeeds, so it does not return completion code

NOTE: this function checks sizes of input arrays, but it does  NOT  checks
      for presence of infinities or NAN's.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdateadd1(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* u,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector bufr;

    ae_frame_make(_state, &_frame_block);
    memset(&bufr, 0, sizeof(bufr));
    ae_vector_init(&bufr, 0, DT_REAL, _state, ae_true);

    ae_assert(n>0, "SPDMatrixCholeskyUpdateAdd1: N<=0", _state);
    ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateAdd1: Rows(A)<N", _state);
    ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateAdd1: Cols(A)<N", _state);
    ae_assert(u->cnt>=n, "SPDMatrixCholeskyUpdateAdd1: Length(U)<N", _state);
    spdmatrixcholeskyupdateadd1buf(a, n, isupper, u, &bufr, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Update of Cholesky decomposition: "fixing" some variables.

This function uses internally allocated buffer which is not saved  between
subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
we  recommend   you   to   use   "buffered"   version   of  this function:
SPDMatrixCholeskyUpdateFixBuf().

"FIXING" EXPLAINED:

    Suppose we have N*N positive definite matrix A. "Fixing" some variable
    means filling corresponding row/column of  A  by  zeros,  and  setting
    diagonal element to 1.

    For example, if we fix 2nd variable in 4*4 matrix A, it becomes Af:

        ( A00  A01  A02  A03 )      ( Af00  0   Af02 Af03 )
        ( A10  A11  A12  A13 )      (  0    1    0    0   )
        ( A20  A21  A22  A23 )  =>  ( Af20  0   Af22 Af23 )
        ( A30  A31  A32  A33 )      ( Af30  0   Af32 Af33 )

    If we have Cholesky decomposition of A, it must be recalculated  after
    variables were  fixed.  However,  it  is  possible  to  use  efficient
    algorithm, which needs O(K*N^2)  time  to  "fix"  K  variables,  given
    Cholesky decomposition of original, "unfixed" A.

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    Fix     -   array[N], I-th element is True if I-th  variable  must  be
                fixed. Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

NOTE: this function always succeeds, so it does not return completion code

NOTE: this function checks sizes of input arrays, but it does  NOT  checks
      for presence of infinities or NAN's.

NOTE: this  function  is  efficient  only  for  moderate amount of updated
      variables - say, 0.1*N or 0.3*N. For larger amount of  variables  it
      will  still  work,  but  you  may  get   better   performance   with
      straightforward Cholesky.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdatefix(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Boolean */ ae_vector* fix,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector bufr;

    ae_frame_make(_state, &_frame_block);
    memset(&bufr, 0, sizeof(bufr));
    ae_vector_init(&bufr, 0, DT_REAL, _state, ae_true);

    ae_assert(n>0, "SPDMatrixCholeskyUpdateFix: N<=0", _state);
    ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateFix: Rows(A)<N", _state);
    ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateFix: Cols(A)<N", _state);
    ae_assert(fix->cnt>=n, "SPDMatrixCholeskyUpdateFix: Length(Fix)<N", _state);
    spdmatrixcholeskyupdatefixbuf(a, n, isupper, fix, &bufr, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
version which uses preallocated buffer which is saved  between  subsequent
function calls.

See comments for SPDMatrixCholeskyUpdateAdd1() for more information.

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    U       -   array[N], rank-1 update to A: A_mod = A + u*u'
                Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdateadd1buf(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* u,
     /* Real    */ ae_vector* bufr,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t nz;
    double cs;
    double sn;
    double v;
    double vv;


    ae_assert(n>0, "SPDMatrixCholeskyUpdateAdd1Buf: N<=0", _state);
    ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateAdd1Buf: Rows(A)<N", _state);
    ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateAdd1Buf: Cols(A)<N", _state);
    ae_assert(u->cnt>=n, "SPDMatrixCholeskyUpdateAdd1Buf: Length(U)<N", _state);

    /*
     * Find index of first non-zero entry in U
     */
    nz = n;
    for(i=0; i<=n-1; i++)
    {
        if( ae_fp_neq(u->ptr.p_double[i],(double)(0)) )
        {
            nz = i;
            break;
        }
    }
    if( nz==n )
    {

        /*
         * Nothing to update
         */
        return;
    }

    /*
     * If working with upper triangular matrix
     */
    if( isupper )
    {

        /*
         * Perform a sequence of updates which fix variables one by one.
         * This approach is different from one which is used when we work
         * with lower triangular matrix.
         */
        rvectorsetlengthatleast(bufr, n, _state);
        for(j=nz; j<=n-1; j++)
        {
            bufr->ptr.p_double[j] = u->ptr.p_double[j];
        }
        for(i=nz; i<=n-1; i++)
        {
            if( ae_fp_neq(bufr->ptr.p_double[i],(double)(0)) )
            {
                generaterotation(a->ptr.pp_double[i][i], bufr->ptr.p_double[i], &cs, &sn, &v, _state);
                a->ptr.pp_double[i][i] = v;
                bufr->ptr.p_double[i] = 0.0;
                for(j=i+1; j<=n-1; j++)
                {
                    v = a->ptr.pp_double[i][j];
                    vv = bufr->ptr.p_double[j];
                    a->ptr.pp_double[i][j] = cs*v+sn*vv;
                    bufr->ptr.p_double[j] = -sn*v+cs*vv;
                }
            }
        }
    }
    else
    {

        /*
         * Calculate rows of modified Cholesky factor, row-by-row
         * (updates performed during variable fixing are applied
         * simultaneously to each row)
         */
        rvectorsetlengthatleast(bufr, 3*n, _state);
        for(j=nz; j<=n-1; j++)
        {
            bufr->ptr.p_double[j] = u->ptr.p_double[j];
        }
        for(i=nz; i<=n-1; i++)
        {

            /*
             * Update all previous updates [Idx+1...I-1] to I-th row
             */
            vv = bufr->ptr.p_double[i];
            for(j=nz; j<=i-1; j++)
            {
                cs = bufr->ptr.p_double[n+2*j+0];
                sn = bufr->ptr.p_double[n+2*j+1];
                v = a->ptr.pp_double[i][j];
                a->ptr.pp_double[i][j] = cs*v+sn*vv;
                vv = -sn*v+cs*vv;
            }

            /*
             * generate rotation applied to I-th element of update vector
             */
            generaterotation(a->ptr.pp_double[i][i], vv, &cs, &sn, &v, _state);
            a->ptr.pp_double[i][i] = v;
            bufr->ptr.p_double[n+2*i+0] = cs;
            bufr->ptr.p_double[n+2*i+1] = sn;
        }
    }
}


/*************************************************************************
Update of Cholesky  decomposition:  "fixing"  some  variables.  "Buffered"
version which uses preallocated buffer which is saved  between  subsequent
function calls.

See comments for SPDMatrixCholeskyUpdateFix() for more information.

INPUT PARAMETERS:
    A       -   upper or lower Cholesky factor.
                array with elements [0..N-1, 0..N-1].
                Exception is thrown if array size is too small.
    N       -   size of matrix A, N>0
    IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
                otherwise A contains a lower one.
    Fix     -   array[N], I-th element is True if I-th  variable  must  be
                fixed. Exception is thrown if array size is too small.
    BufR    -   possibly preallocated  buffer;  automatically  resized  if
                needed. It is recommended to  reuse  this  buffer  if  you
                perform a lot of subsequent decompositions.

OUTPUT PARAMETERS:
    A       -   updated factorization.  If  IsUpper=True,  then  the  upper
                triangle contains matrix U, and the elements below the main
                diagonal are not modified. Similarly, if IsUpper = False.

  -- ALGLIB --
     03.02.2014
     Sergey Bochkanov
*************************************************************************/
void spdmatrixcholeskyupdatefixbuf(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     /* Boolean */ ae_vector* fix,
     /* Real    */ ae_vector* bufr,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t nfix;
    ae_int_t idx;
    double cs;
    double sn;
    double v;
    double vv;


    ae_assert(n>0, "SPDMatrixCholeskyUpdateFixBuf: N<=0", _state);
    ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateFixBuf: Rows(A)<N", _state);
    ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateFixBuf: Cols(A)<N", _state);
    ae_assert(fix->cnt>=n, "SPDMatrixCholeskyUpdateFixBuf: Length(Fix)<N", _state);

    /*
     * Count number of variables to fix.
     * Quick exit if NFix=0 or NFix=N
     */
    nfix = 0;
    for(i=0; i<=n-1; i++)
    {
        if( fix->ptr.p_bool[i] )
        {
            inc(&nfix, _state);
        }
    }
    if( nfix==0 )
    {

        /*
         * Nothing to fix
         */
        return;
    }
    if( nfix==n )
    {

        /*
         * All variables are fixed.
         * Set A to identity and exit.
         */
        if( isupper )
        {
            for(i=0; i<=n-1; i++)
            {
                a->ptr.pp_double[i][i] = (double)(1);
                for(j=i+1; j<=n-1; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=i-1; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
                a->ptr.pp_double[i][i] = (double)(1);
            }
        }
        return;
    }

    /*
     * If working with upper triangular matrix
     */
    if( isupper )
    {

        /*
         * Perform a sequence of updates which fix variables one by one.
         * This approach is different from one which is used when we work
         * with lower triangular matrix.
         */
        rvectorsetlengthatleast(bufr, n, _state);
        for(k=0; k<=n-1; k++)
        {
            if( fix->ptr.p_bool[k] )
            {
                idx = k;

                /*
                 * Quick exit if it is last variable
                 */
                if( idx==n-1 )
                {
                    for(i=0; i<=idx-1; i++)
                    {
                        a->ptr.pp_double[i][idx] = 0.0;
                    }
                    a->ptr.pp_double[idx][idx] = 1.0;
                    continue;
                }

                /*
                 * We have Cholesky decomposition of quadratic term in A,
                 * with upper triangle being stored as given below:
                 *
                 *         ( U00 u01 U02 )
                 *     U = (     u11 u12 )
                 *         (         U22 )
                 *
                 * Here u11 is diagonal element corresponding to variable K. We
                 * want to fix this variable, and we do so by modifying U as follows:
                 *
                 *             ( U00  0  U02 )
                 *     U_mod = (      1   0  )
                 *             (         U_m )
                 *
                 * with U_m = CHOLESKY [ (U22^T)*U22 + (u12^T)*u12 ]
                 *
                 * Of course, we can calculate U_m by calculating (U22^T)*U22 explicitly,
                 * modifying it and performing Cholesky decomposition of modified matrix.
                 * However, we can treat it as follows:
                 * * we already have CHOLESKY[(U22^T)*U22], which is equal to U22
                 * * we have rank-1 update (u12^T)*u12 applied to (U22^T)*U22
                 * * thus, we can calculate updated Cholesky with O(N^2) algorithm
                 *   instead of O(N^3) one
                 */
                for(j=idx+1; j<=n-1; j++)
                {
                    bufr->ptr.p_double[j] = a->ptr.pp_double[idx][j];
                }
                for(i=0; i<=idx-1; i++)
                {
                    a->ptr.pp_double[i][idx] = 0.0;
                }
                a->ptr.pp_double[idx][idx] = 1.0;
                for(i=idx+1; i<=n-1; i++)
                {
                    a->ptr.pp_double[idx][i] = 0.0;
                }
                for(i=idx+1; i<=n-1; i++)
                {
                    if( ae_fp_neq(bufr->ptr.p_double[i],(double)(0)) )
                    {
                        generaterotation(a->ptr.pp_double[i][i], bufr->ptr.p_double[i], &cs, &sn, &v, _state);
                        a->ptr.pp_double[i][i] = v;
                        bufr->ptr.p_double[i] = 0.0;
                        for(j=i+1; j<=n-1; j++)
                        {
                            v = a->ptr.pp_double[i][j];
                            vv = bufr->ptr.p_double[j];
                            a->ptr.pp_double[i][j] = cs*v+sn*vv;
                            bufr->ptr.p_double[j] = -sn*v+cs*vv;
                        }
                    }
                }
            }
        }
    }
    else
    {

        /*
         * Calculate rows of modified Cholesky factor, row-by-row
         * (updates performed during variable fixing are applied
         * simultaneously to each row)
         */
        rvectorsetlengthatleast(bufr, 3*n, _state);
        for(k=0; k<=n-1; k++)
        {
            if( fix->ptr.p_bool[k] )
            {
                idx = k;

                /*
                 * Quick exit if it is last variable
                 */
                if( idx==n-1 )
                {
                    for(i=0; i<=idx-1; i++)
                    {
                        a->ptr.pp_double[idx][i] = 0.0;
                    }
                    a->ptr.pp_double[idx][idx] = 1.0;
                    continue;
                }

                /*
                 * store column to buffer and clear row/column of A
                 */
                for(j=idx+1; j<=n-1; j++)
                {
                    bufr->ptr.p_double[j] = a->ptr.pp_double[j][idx];
                }
                for(i=0; i<=idx-1; i++)
                {
                    a->ptr.pp_double[idx][i] = 0.0;
                }
                a->ptr.pp_double[idx][idx] = 1.0;
                for(i=idx+1; i<=n-1; i++)
                {
                    a->ptr.pp_double[i][idx] = 0.0;
                }

                /*
                 * Apply update to rows of A
                 */
                for(i=idx+1; i<=n-1; i++)
                {

                    /*
                     * Update all previous updates [Idx+1...I-1] to I-th row
                     */
                    vv = bufr->ptr.p_double[i];
                    for(j=idx+1; j<=i-1; j++)
                    {
                        cs = bufr->ptr.p_double[n+2*j+0];
                        sn = bufr->ptr.p_double[n+2*j+1];
                        v = a->ptr.pp_double[i][j];
                        a->ptr.pp_double[i][j] = cs*v+sn*vv;
                        vv = -sn*v+cs*vv;
                    }

                    /*
                     * generate rotation applied to I-th element of update vector
                     */
                    generaterotation(a->ptr.pp_double[i][i], vv, &cs, &sn, &v, _state);
                    a->ptr.pp_double[i][i] = v;
                    bufr->ptr.p_double[n+2*i+0] = cs;
                    bufr->ptr.p_double[n+2*i+1] = sn;
                }
            }
        }
    }
}


/*************************************************************************
Sparse LU decomposition with column pivoting for sparsity and row pivoting
for stability. Input must be square sparse matrix stored in CRS format.

The algorithm  computes  LU  decomposition  of  a  general  square  matrix
(rectangular ones are not supported). The result  of  an  algorithm  is  a
representation of A as A = P*L*U*Q, where:
* L is lower unitriangular matrix
* U is upper triangular matrix
* P = P0*P1*...*PK, K=N-1, Pi - permutation matrix for I and P[I]
* Q = QK*...*Q1*Q0, K=N-1, Qi - permutation matrix for I and Q[I]

This function pivots columns for higher sparsity, and then pivots rows for
stability (larger element at the diagonal).

INPUT PARAMETERS:
    A       -   sparse NxN matrix in CRS format. An exception is generated
                if matrix is non-CRS or non-square.
    PivotType-  pivoting strategy:
                * 0 for best pivoting available (2 in current version)
                * 1 for row-only pivoting (NOT RECOMMENDED)
                * 2 for complete pivoting which produces most sparse outputs

OUTPUT PARAMETERS:
    A       -   the result of factorization, matrices L and U stored in
                compact form using CRS sparse storage format:
                * lower unitriangular L is stored strictly under main diagonal
                * upper triangilar U is stored ON and ABOVE main diagonal
    P       -   row permutation matrix in compact form, array[N]
    Q       -   col permutation matrix in compact form, array[N]

This function always succeeds, i.e. it ALWAYS returns valid factorization,
but for your convenience it also returns  boolean  value  which  helps  to
detect symbolically degenerate matrices:
* function returns TRUE, if the matrix was factorized AND symbolically
  non-degenerate
* function returns FALSE, if the matrix was factorized but U has strictly
  zero elements at the diagonal (the factorization is returned anyway).


  -- ALGLIB routine --
     03.09.2018
     Bochkanov Sergey
*************************************************************************/
ae_bool sparselu(sparsematrix* a,
     ae_int_t pivottype,
     /* Integer */ ae_vector* p,
     /* Integer */ ae_vector* q,
     ae_state *_state)
{
    ae_frame _frame_block;
    sluv2buffer buf2;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&buf2, 0, sizeof(buf2));
    ae_vector_clear(p);
    ae_vector_clear(q);
    _sluv2buffer_init(&buf2, _state, ae_true);

    ae_assert((pivottype==0||pivottype==1)||pivottype==2, "SparseLU: unexpected pivot type", _state);
    ae_assert(sparseiscrs(a, _state), "SparseLU: A is not stored in CRS format", _state);
    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseLU: non-square A", _state);
    result = sptrflu(a, pivottype, p, q, &buf2, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Sparse Cholesky decomposition for skyline matrixm using in-place algorithm
without allocating additional storage.

The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
definite sparse matrix. The result of an algorithm is a representation  of
A as A=U^T*U or A=L*L^T

This function allows to perform very efficient decomposition of low-profile
matrices (average bandwidth is ~5-10 elements). For larger matrices it  is
recommended to use supernodal Cholesky decomposition: SparseCholeskyP() or
SparseCholeskyAnalyze()/SparseCholeskyFactorize().

INPUT PARAMETERS:
    A       -   sparse matrix in skyline storage (SKS) format.
    N       -   size of matrix A (can be smaller than actual size of A)
    IsUpper -   if IsUpper=True, then factorization is performed on  upper
                triangle. Another triangle is ignored (it may contant some
                data, but it is not changed).


OUTPUT PARAMETERS:
    A       -   the result of factorization, stored in SKS. If IsUpper=True,
                then the upper  triangle  contains  matrix  U,  such  that
                A = U^T*U. Lower triangle is not changed.
                Similarly, if IsUpper = False. In this case L is returned,
                and we have A = L*(L^T).
                Note that THIS function does not  perform  permutation  of
                rows to reduce bandwidth.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False. Contents of A is not determined
    in such case.

NOTE: for  performance  reasons  this  function  does NOT check that input
      matrix  includes  only  finite  values. It is your responsibility to
      make sure that there are no infinite or NAN values in the matrix.

  -- ALGLIB routine --
     16.01.2014
     Bochkanov Sergey
*************************************************************************/
ae_bool sparsecholeskyskyline(sparsematrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_int_t jnz;
    ae_int_t jnza;
    ae_int_t jnzl;
    double v;
    double vv;
    double a12;
    ae_int_t nready;
    ae_int_t nadd;
    ae_int_t banda;
    ae_int_t offsa;
    ae_int_t offsl;
    ae_bool result;


    ae_assert(n>=0, "SparseCholeskySkyline: N<0", _state);
    ae_assert(sparsegetnrows(a, _state)>=n, "SparseCholeskySkyline: rows(A)<N", _state);
    ae_assert(sparsegetncols(a, _state)>=n, "SparseCholeskySkyline: cols(A)<N", _state);
    ae_assert(sparseissks(a, _state), "SparseCholeskySkyline: A is not stored in SKS format", _state);
    result = ae_false;

    /*
     * transpose if needed
     */
    if( isupper )
    {
        sparsetransposesks(a, _state);
    }

    /*
     * Perform Cholesky decomposition:
     * * we assume than leading NReady*NReady submatrix is done
     * * having Cholesky decomposition of NReady*NReady submatrix we
     *   obtain decomposition of larger (NReady+NAdd)*(NReady+NAdd) one.
     *
     * Here is algorithm. At the start we have
     *
     *     (      |   )
     *     (  L   |   )
     * S = (      |   )
     *     (----------)
     *     (  A   | B )
     *
     * with L being already computed Cholesky factor, A and B being
     * unprocessed parts of the matrix. Of course, L/A/B are stored
     * in SKS format.
     *
     * Then, we calculate A1:=(inv(L)*A')' and replace A with A1.
     * Then, we calculate B1:=B-A1*A1'     and replace B with B1
     *
     * Finally, we calculate small NAdd*NAdd Cholesky of B1 with
     * dense solver. Now, L/A1/B1 are Cholesky decomposition of the
     * larger (NReady+NAdd)*(NReady+NAdd) matrix.
     */
    nready = 0;
    nadd = 1;
    while(nready<n)
    {
        ae_assert(nadd==1, "SkylineCholesky: internal error", _state);

        /*
         * Calculate A1:=(inv(L)*A')'
         *
         * Elements are calculated row by row (example below is given
         * for NAdd=1):
         * * first, we solve L[0,0]*A1[0]=A[0]
         * * then, we solve  L[1,0]*A1[0]+L[1,1]*A1[1]=A[1]
         * * then, we move to next row and so on
         * * during calculation of A1 we update A12 - squared norm of A1
         *
         * We extensively use sparsity of both A/A1 and L:
         * * first, equations from 0 to BANDWIDTH(A1)-1 are completely zero
         * * second, for I>=BANDWIDTH(A1), I-th equation is reduced from
         *     L[I,0]*A1[0] + L[I,1]*A1[1] + ... + L[I,I]*A1[I] = A[I]
         *   to
         *     L[I,JNZ]*A1[JNZ] + ... + L[I,I]*A1[I] = A[I]
         *   where JNZ = max(NReady-BANDWIDTH(A1),I-BANDWIDTH(L[i]))
         *   (JNZ is an index of the firts column where both A and L become
         *   nonzero).
         *
         * NOTE: we rely on details of SparseMatrix internal storage format.
         *       This is allowed by SparseMatrix specification.
         */
        a12 = 0.0;
        if( a->didx.ptr.p_int[nready]>0 )
        {
            banda = a->didx.ptr.p_int[nready];
            for(i=nready-banda; i<=nready-1; i++)
            {

                /*
                 * Elements of A1[0:I-1] were computed:
                 * * A1[0:NReady-BandA-1] are zero (sparse)
                 * * A1[NReady-BandA:I-1] replaced corresponding elements of A
                 *
                 * Now it is time to get I-th one.
                 *
                 * First, we calculate:
                 * * JNZA  - index of the first column where A become nonzero
                 * * JNZL  - index of the first column where L become nonzero
                 * * JNZ   - index of the first column where both A and L become nonzero
                 * * OffsA - offset of A[JNZ] in A.Vals
                 * * OffsL - offset of L[I,JNZ] in A.Vals
                 *
                 * Then, we solve SUM(A1[j]*L[I,j],j=JNZ..I-1) + A1[I]*L[I,I] = A[I],
                 * with A1[JNZ..I-1] already known, and A1[I] unknown.
                 */
                jnza = nready-banda;
                jnzl = i-a->didx.ptr.p_int[i];
                jnz = ae_maxint(jnza, jnzl, _state);
                offsa = a->ridx.ptr.p_int[nready]+(jnz-jnza);
                offsl = a->ridx.ptr.p_int[i]+(jnz-jnzl);
                v = 0.0;
                k = i-1-jnz;
                for(j=0; j<=k; j++)
                {
                    v = v+a->vals.ptr.p_double[offsa+j]*a->vals.ptr.p_double[offsl+j];
                }
                vv = (a->vals.ptr.p_double[offsa+k+1]-v)/a->vals.ptr.p_double[offsl+k+1];
                a->vals.ptr.p_double[offsa+k+1] = vv;
                a12 = a12+vv*vv;
            }
        }

        /*
         * Calculate CHOLESKY(B-A1*A1')
         */
        offsa = a->ridx.ptr.p_int[nready]+a->didx.ptr.p_int[nready];
        v = a->vals.ptr.p_double[offsa];
        if( ae_fp_less_eq(v,a12) )
        {
            result = ae_false;
            return result;
        }
        a->vals.ptr.p_double[offsa] = ae_sqrt(v-a12, _state);

        /*
         * Increase size of the updated matrix
         */
        inc(&nready, _state);
    }

    /*
     * transpose if needed
     */
    if( isupper )
    {
        sparsetransposesks(a, _state);
    }
    result = ae_true;
    return result;
}


/*************************************************************************
Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage,
without rows/cols permutation.

This function is the most convenient (less parameters to specify), although
less efficient, version of sparse Cholesky.

Internally it:
* calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
  phase with no permutation being configured.
* calls SparseCholeskyFactorize() function to perform numerical  phase  of
  the factorization

Following alternatives may result in better performance:
* using SparseCholeskyP(), which selects best  pivoting  available,  which
  almost always results in improved sparsity and cache locality
* using  SparseCholeskyAnalyze() and SparseCholeskyFactorize()   functions
  directly,  which  may  improve  performance of repetitive factorizations
  with same sparsity patterns.

The latter also allows one to perform  LDLT  factorization  of  indefinite
matrix (one with strictly diagonal D, which is known  to  be  stable  only
in few special cases, like quasi-definite matrices).

INPUT PARAMETERS:
    A       -   a square NxN sparse matrix, stored in any storage format.
    IsUpper -   if IsUpper=True, then factorization is performed on  upper
                triangle.  Another triangle is ignored on  input,  dropped
                on output. Similarly, if IsUpper=False, the lower triangle
                is processed.

OUTPUT PARAMETERS:
    A       -   the result of factorization, stored in CRS format:
                * if IsUpper=True, then the upper triangle contains matrix
                  U such  that  A = U^T*U and the lower triangle is empty.
                * similarly, if IsUpper=False, then lower triangular L  is
                  returned and we have A = L*(L^T).
                Note that THIS function does not  perform  permutation  of
                the rows to reduce fill-in.

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False.  Contents  of  A  is  undefined
    in such case.

NOTE: for  performance  reasons  this  function  does NOT check that input
      matrix  includes  only  finite  values. It is your responsibility to
      make sure that there are no infinite or NAN values in the matrix.

  -- ALGLIB routine --
     16.09.2020
     Bochkanov Sergey
*************************************************************************/
ae_bool sparsecholesky(sparsematrix* a, ae_bool isupper, ae_state *_state)
{
    ae_frame _frame_block;
    sparsedecompositionanalysis analysis;
    ae_int_t facttype;
    ae_int_t permtype;
    ae_vector dummyd;
    ae_vector dummyp;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&analysis, 0, sizeof(analysis));
    memset(&dummyd, 0, sizeof(dummyd));
    memset(&dummyp, 0, sizeof(dummyp));
    _sparsedecompositionanalysis_init(&analysis, _state, ae_true);
    ae_vector_init(&dummyd, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&dummyp, 0, DT_INT, _state, ae_true);

    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholesky: A is not square", _state);

    /*
     * Quick exit
     */
    if( sparsegetnrows(a, _state)==0 )
    {
        result = ae_true;
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Choose factorization and permutation: vanilla Cholesky and no permutation
     */
    facttype = 0;
    permtype = -1;

    /*
     * Easy case - CRS matrix in lower triangle, no conversion or transposition is needed
     */
    if( sparseiscrs(a, _state)&&!isupper )
    {
        result = spsymmanalyze(a, facttype, permtype, &analysis.analysis, _state);
        if( !result )
        {
            ae_frame_leave(_state);
            return result;
        }
        result = spsymmfactorize(&analysis.analysis, _state);
        if( !result )
        {
            ae_frame_leave(_state);
            return result;
        }
        spsymmextract(&analysis.analysis, a, &dummyd, &dummyp, _state);
        ae_frame_leave(_state);
        return result;
    }

    /*
     * A bit more complex - we need conversion and/or transposition
     */
    if( isupper )
    {
        sparsecopytocrsbuf(a, &analysis.wrkat, _state);
        sparsecopytransposecrsbuf(&analysis.wrkat, &analysis.wrka, _state);
    }
    else
    {
        sparsecopytocrsbuf(a, &analysis.wrka, _state);
    }
    result = spsymmanalyze(&analysis.wrka, facttype, permtype, &analysis.analysis, _state);
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }
    result = spsymmfactorize(&analysis.analysis, _state);
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }
    spsymmextract(&analysis.analysis, &analysis.wrka, &dummyd, &dummyp, _state);
    if( isupper )
    {
        sparsecopytransposecrsbuf(&analysis.wrka, a, _state);
    }
    else
    {
        sparsecopybuf(&analysis.wrka, a, _state);
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage
format, with performance-enhancing permutation of rows/cols.

Present version is configured  to  perform  supernodal  permutation  which
sparsity reducing ordering.

This function is a wrapper around generic sparse  decomposition  functions
that internally:
* calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
  phase with best available permutation being configured.
* calls SparseCholeskyFactorize() function to perform numerical  phase  of
  the factorization.

NOTE: using  SparseCholeskyAnalyze() and SparseCholeskyFactorize() directly
      may improve  performance  of  repetitive  factorizations  with  same
      sparsity patterns. It also allows one to perform  LDLT factorization
      of  indefinite  matrix  -  a factorization with strictly diagonal D,
      which  is  known to be stable only in few special cases, like quasi-
      definite matrices.

INPUT PARAMETERS:
    A       -   a square NxN sparse matrix, stored in any storage format.
    IsUpper -   if IsUpper=True, then factorization is performed on  upper
                triangle.  Another triangle is ignored on  input,  dropped
                on output. Similarly, if IsUpper=False, the lower triangle
                is processed.

OUTPUT PARAMETERS:
    A       -   the result of factorization, stored in CRS format:
                * if IsUpper=True, then the upper triangle contains matrix
                  U such  that  A = U^T*U and the lower triangle is empty.
                * similarly, if IsUpper=False, then lower triangular L  is
                  returned and we have A = L*(L^T).
    P       -   a row/column permutation, a product of P0*P1*...*Pk, k=N-1,
                with Pi being permutation of rows/cols I and P[I]

RESULT:
    If  the  matrix  is  positive-definite,  the  function  returns  True.
    Otherwise, the function returns False.  Contents  of  A  is  undefined
    in such case.

NOTE: for  performance  reasons  this  function  does NOT check that input
      matrix  includes  only  finite  values. It is your responsibility to
      make sure that there are no infinite or NAN values in the matrix.

  -- ALGLIB routine --
     16.09.2020
     Bochkanov Sergey
*************************************************************************/
ae_bool sparsecholeskyp(sparsematrix* a,
     ae_bool isupper,
     /* Integer */ ae_vector* p,
     ae_state *_state)
{
    ae_frame _frame_block;
    sparsedecompositionanalysis analysis;
    ae_vector dummyd;
    ae_int_t facttype;
    ae_int_t permtype;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&analysis, 0, sizeof(analysis));
    memset(&dummyd, 0, sizeof(dummyd));
    ae_vector_clear(p);
    _sparsedecompositionanalysis_init(&analysis, _state, ae_true);
    ae_vector_init(&dummyd, 0, DT_REAL, _state, ae_true);

    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholeskyP: A is not square", _state);

    /*
     * Quick exit
     */
    if( sparsegetnrows(a, _state)==0 )
    {
        result = ae_true;
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Choose factorization and permutation: vanilla Cholesky and best permutation available
     */
    facttype = 0;
    permtype = 0;

    /*
     * Easy case - CRS matrix in lower triangle, no conversion or transposition is needed
     */
    if( sparseiscrs(a, _state)&&!isupper )
    {
        result = spsymmanalyze(a, facttype, permtype, &analysis.analysis, _state);
        if( !result )
        {
            ae_frame_leave(_state);
            return result;
        }
        result = spsymmfactorize(&analysis.analysis, _state);
        if( !result )
        {
            ae_frame_leave(_state);
            return result;
        }
        spsymmextract(&analysis.analysis, a, &dummyd, p, _state);
        ae_frame_leave(_state);
        return result;
    }

    /*
     * A bit more complex - we need conversion and/or transposition
     */
    if( isupper )
    {
        sparsecopytocrsbuf(a, &analysis.wrkat, _state);
        sparsecopytransposecrsbuf(&analysis.wrkat, &analysis.wrka, _state);
    }
    else
    {
        sparsecopytocrsbuf(a, &analysis.wrka, _state);
    }
    result = spsymmanalyze(&analysis.wrka, facttype, permtype, &analysis.analysis, _state);
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }
    result = spsymmfactorize(&analysis.analysis, _state);
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }
    spsymmextract(&analysis.analysis, &analysis.wrka, &dummyd, p, _state);
    if( isupper )
    {
        sparsecopytransposecrsbuf(&analysis.wrka, a, _state);
    }
    else
    {
        sparsecopybuf(&analysis.wrka, a, _state);
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Sparse Cholesky/LDLT decomposition: symbolic analysis phase.

This function is a part of the 'expert' sparse Cholesky API:
* SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
  matrix to be factorized into internal storage
* SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
  with lower bounds on pivot magnitudes and additional overflow safeguards
* SparseCholeskyFactorize(),  that performs  numeric  factorization  using
  precomputed symbolic analysis and internally stored matrix - and outputs
  result
* SparseCholeskyReload(), that reloads one more matrix with same  sparsity
  pattern into internal storage so  one  may  reuse  previously  allocated
  temporaries and previously performed symbolic analysis

This specific function performs preliminary analysis of the  Cholesky/LDLT
factorization. It allows to choose  different  permutation  types  and  to
choose between classic Cholesky and  indefinite  LDLT  factorization  (the
latter is computed with strictly diagonal D, i.e.  without  Bunch-Kauffman
pivoting).

NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
      matrices. However, numerical stability is guaranteed ONLY for a class
      of quasi-definite matrices.

NOTE: all internal processing is performed with lower triangular  matrices
      stored  in  CRS  format.  Any  other  storage  formats  and/or upper
      triangular storage means  that  one  format  conversion  and/or  one
      transposition will be performed  internally  for  the  analysis  and
      factorization phases. Thus, highest  performance  is  achieved  when
      input is a lower triangular CRS matrix.

INPUT PARAMETERS:
    A           -   sparse square matrix in any sparse storage format.
    IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
                    other one is ignored).
    FactType    -   factorization type:
                    * 0 for traditional Cholesky of SPD matrix
                    * 1 for LDLT decomposition with strictly  diagonal  D,
                        which may have non-positive entries.
    PermType    -   permutation type:
                    *-1 for absence of permutation
                    * 0 for best fill-in reducing  permutation  available,
                        which is 3 in the current version
                    * 1 for supernodal ordering (improves locality and
                      performance, does NOT change fill-in factor)
                    * 2 for original AMD ordering
                    * 3 for  improved  AMD  (approximate  minimum  degree)
                        ordering with better  handling  of  matrices  with
                        dense rows/columns

OUTPUT PARAMETERS:
    Analysis    -   contains:
                    * symbolic analysis of the matrix structure which will
                      be used later to guide numerical factorization.
                    * specific numeric values loaded into internal  memory
                      waiting for the factorization to be performed

This function fails if and only if the matrix A is symbolically degenerate
i.e. has diagonal element which is exactly zero. In  such  case  False  is
returned, contents of Analysis object is undefined.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
ae_bool sparsecholeskyanalyze(sparsematrix* a,
     ae_bool isupper,
     ae_int_t facttype,
     ae_int_t permtype,
     sparsedecompositionanalysis* analysis,
     ae_state *_state)
{
    ae_bool result;

    _sparsedecompositionanalysis_clear(analysis);

    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholeskyAnalyze: A is not square", _state);
    ae_assert(facttype==0||facttype==1, "SparseCholeskyAnalyze: unexpected FactType", _state);
    ae_assert((((((permtype==0||permtype==1)||permtype==2)||permtype==3)||permtype==-1)||permtype==-2)||permtype==-3, "SparseCholeskyAnalyze: unexpected PermType", _state);
    analysis->n = sparsegetnrows(a, _state);
    analysis->facttype = facttype;
    analysis->permtype = permtype;
    if( !sparseiscrs(a, _state) )
    {

        /*
         * The matrix is stored in non-CRS format. First, we have to convert
         * it to CRS. Then we may need to transpose it in order to get lower
         * triangular one (as supported by SPSymmAnalyze).
         */
        sparsecopytocrs(a, &analysis->crsa, _state);
        if( isupper )
        {
            sparsecopytransposecrsbuf(&analysis->crsa, &analysis->crsat, _state);
            result = spsymmanalyze(&analysis->crsat, facttype, permtype, &analysis->analysis, _state);
        }
        else
        {
            result = spsymmanalyze(&analysis->crsa, facttype, permtype, &analysis->analysis, _state);
        }
    }
    else
    {

        /*
         * The matrix is stored in CRS format. However we may need to
         * transpose it in order to get lower triangular one (as supported
         * by SPSymmAnalyze).
         */
        if( isupper )
        {
            sparsecopytransposecrsbuf(a, &analysis->crsat, _state);
            result = spsymmanalyze(&analysis->crsat, facttype, permtype, &analysis->analysis, _state);
        }
        else
        {
            result = spsymmanalyze(a, facttype, permtype, &analysis->analysis, _state);
        }
    }
    return result;
}


/*************************************************************************
Allows to control stability-improving  modification  strategy  for  sparse
Cholesky/LDLT decompositions. Modified Cholesky is more  robust  than  its
unmodified counterpart.

This function is a part of the 'expert' sparse Cholesky API:
* SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
  matrix to be factorized into internal storage
* SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
  with lower bounds on pivot magnitudes and additional overflow safeguards
* SparseCholeskyFactorize(),  that performs  numeric  factorization  using
  precomputed symbolic analysis and internally stored matrix - and outputs
  result
* SparseCholeskyReload(), that reloads one more matrix with same  sparsity
  pattern into internal storage so  one  may  reuse  previously  allocated
  temporaries and previously performed symbolic analysis

INPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure
    ModStrategy -   modification type:
                    * 0 for traditional Cholesky/LDLT (Cholesky fails when
                      encounters nonpositive pivot, LDLT fails  when  zero
                      pivot   is  encountered,  no  stability  checks  for
                      overflows/underflows)
                    * 1 for modified Cholesky with additional checks:
                      * pivots less than ModParam0 are increased; (similar
                        sign-preserving procedure is applied during LDLT)
                      * if,  at  some  moment,  sum  of absolute values of
                        elements in column  J  will  become  greater  than
                        ModParam1, Cholesky/LDLT will treat it as  failure
                        and will stop immediately
    P0, P1, P2,P3 - modification parameters #0 #1, #2 and #3.
                    Params #2 and #3 are ignored in current version.

OUTPUT PARAMETERS:
    Analysis    -   symbolic analysis of the matrix structure, new strategy
                    Results will be seen with next SparseCholeskyFactorize()
                    call.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
void sparsecholeskysetmodtype(sparsedecompositionanalysis* analysis,
     ae_int_t modstrategy,
     double p0,
     double p1,
     double p2,
     double p3,
     ae_state *_state)
{


    spsymmsetmodificationstrategy(&analysis->analysis, modstrategy, p0, p1, p2, p3, _state);
}


/*************************************************************************
Sparse Cholesky decomposition: numerical analysis phase.

This function is a part of the 'expert' sparse Cholesky API:
* SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
  matrix to be factorized into internal storage
* SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
  with lower bounds on pivot magnitudes and additional overflow safeguards
* SparseCholeskyFactorize(),  that performs  numeric  factorization  using
  precomputed symbolic analysis and internally stored matrix - and outputs
  result
* SparseCholeskyReload(), that reloads one more matrix with same  sparsity
  pattern into internal storage so  one  may  reuse  previously  allocated
  temporaries and previously performed symbolic analysis

Depending on settings specified during SparseCholeskyAnalyze() call it may
produce classic Cholesky or L*D*LT  decomposition  (with strictly diagonal
D), without permutation or with performance-enhancing permutation P.

NOTE: all internal processing is performed with lower triangular  matrices
      stored  in  CRS  format.  Any  other  storage  formats  and/or upper
      triangular storage means  that  one  format  conversion  and/or  one
      transposition will be performed  internally  for  the  analysis  and
      factorization phases. Thus, highest  performance  is  achieved  when
      input is a lower triangular CRS matrix, and lower triangular  output
      is requested.

NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
      matrices. However, numerical stability is guaranteed ONLY for a class
      of quasi-definite matrices.

INPUT PARAMETERS:
    Analysis    -   prior analysis with internally stored matrix which will
                    be factorized
    NeedUpper   -   whether upper triangular or lower triangular output is
                    needed

OUTPUT PARAMETERS:
    A           -   Cholesky decomposition of A stored in lower triangular
                    CRS format, i.e. A=L*L' (or upper triangular CRS, with
                    A=U'*U, depending on NeedUpper parameter).
    D           -   array[N], diagonal factor. If no diagonal  factor  was
                    required during analysis  phase,  still  returned  but
                    filled with 1's
    P           -   array[N], pivots. Permutation matrix P is a product of
                    P(0)*P(1)*...*P(N-1), where P(i) is a  permutation  of
                    row/col I and P[I] (with P[I]>=I).
                    If no permutation was requested during analysis phase,
                    still returned but filled with identity permutation.

The function returns True  when  factorization  resulted  in nondegenerate
matrix. False is returned when factorization fails (Cholesky factorization
of indefinite matrix) or LDLT factorization has exactly zero  elements  at
the diagonal. In the latter case contents of A, D and P is undefined.

The analysis object is not changed during  the  factorization.  Subsequent
calls to SparseCholeskyFactorize() will result in same factorization being
performed one more time.

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
ae_bool sparsecholeskyfactorize(sparsedecompositionanalysis* analysis,
     ae_bool needupper,
     sparsematrix* a,
     /* Real    */ ae_vector* d,
     /* Integer */ ae_vector* p,
     ae_state *_state)
{
    ae_bool result;

    _sparsematrix_clear(a);
    ae_vector_clear(d);
    ae_vector_clear(p);

    if( needupper )
    {
        result = spsymmfactorize(&analysis->analysis, _state);
        if( !result )
        {
            return result;
        }
        spsymmextract(&analysis->analysis, &analysis->wrka, d, p, _state);
        sparsecopytransposecrsbuf(&analysis->wrka, a, _state);
    }
    else
    {
        result = spsymmfactorize(&analysis->analysis, _state);
        if( !result )
        {
            return result;
        }
        spsymmextract(&analysis->analysis, a, d, p, _state);
    }
    return result;
}


/*************************************************************************
Sparse  Cholesky  decomposition:  update  internally  stored  matrix  with
another one with exactly same sparsity pattern.

This function is a part of the 'expert' sparse Cholesky API:
* SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
  matrix to be factorized into internal storage
* SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
  with lower bounds on pivot magnitudes and additional overflow safeguards
* SparseCholeskyFactorize(),  that performs  numeric  factorization  using
  precomputed symbolic analysis and internally stored matrix - and outputs
  result
* SparseCholeskyReload(), that reloads one more matrix with same  sparsity
  pattern into internal storage so  one  may  reuse  previously  allocated
  temporaries and previously performed symbolic analysis

This specific function replaces internally stored  numerical  values  with
ones from another sparse matrix (but having exactly same sparsity  pattern
as one that was used for initial SparseCholeskyAnalyze() call).

NOTE: all internal processing is performed with lower triangular  matrices
      stored  in  CRS  format.  Any  other  storage  formats  and/or upper
      triangular storage means  that  one  format  conversion  and/or  one
      transposition will be performed  internally  for  the  analysis  and
      factorization phases. Thus, highest  performance  is  achieved  when
      input is a lower triangular CRS matrix.

INPUT PARAMETERS:
    Analysis    -   analysis object
    A           -   sparse square matrix in any sparse storage format.  It
                    MUST have exactly same sparsity pattern as that of the
                    matrix that was passed to SparseCholeskyAnalyze().
                    Any difference (missing elements or additional elements)
                    may result in unpredictable and undefined  behavior  -
                    an algorithm may fail due to memory access violation.
    IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
                    other one is ignored).

OUTPUT PARAMETERS:
    Analysis    -   contains:
                    * symbolic analysis of the matrix structure which will
                      be used later to guide numerical factorization.
                    * specific numeric values loaded into internal  memory
                      waiting for the factorization to be performed

  -- ALGLIB routine --
     20.09.2020
     Bochkanov Sergey
*************************************************************************/
void sparsecholeskyreload(sparsedecompositionanalysis* analysis,
     sparsematrix* a,
     ae_bool isupper,
     ae_state *_state)
{


    ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholeskyReload: A is not square", _state);
    ae_assert(sparsegetnrows(a, _state)==analysis->n, "SparseCholeskyReload: size of A does not match that stored in Analysis", _state);
    if( !sparseiscrs(a, _state) )
    {

        /*
         * The matrix is stored in non-CRS format. First, we have to convert
         * it to CRS. Then we may need to transpose it in order to get lower
         * triangular one (as supported by SPSymmAnalyze).
         */
        sparsecopytocrs(a, &analysis->crsa, _state);
        if( isupper )
        {
            sparsecopytransposecrsbuf(&analysis->crsa, &analysis->crsat, _state);
            spsymmreload(&analysis->analysis, &analysis->crsat, _state);
        }
        else
        {
            spsymmreload(&analysis->analysis, &analysis->crsa, _state);
        }
    }
    else
    {

        /*
         * The matrix is stored in CRS format. However we may need to
         * transpose it in order to get lower triangular one (as supported
         * by SPSymmAnalyze).
         */
        if( isupper )
        {
            sparsecopytransposecrsbuf(a, &analysis->crsat, _state);
            spsymmreload(&analysis->analysis, &analysis->crsat, _state);
        }
        else
        {
            spsymmreload(&analysis->analysis, a, _state);
        }
    }
}


void rmatrixlup(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tmp;
    ae_int_t i;
    ae_int_t j;
    double mx;
    double v;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    ae_vector_clear(pivots);
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);


    /*
     * Internal LU decomposition subroutine.
     * Never call it directly.
     */
    ae_assert(m>0, "RMatrixLUP: incorrect M!", _state);
    ae_assert(n>0, "RMatrixLUP: incorrect N!", _state);

    /*
     * Scale matrix to avoid overflows,
     * decompose it, then scale back.
     */
    mx = (double)(0);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
        }
    }
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = 1/mx;
        for(i=0; i<=m-1; i++)
        {
            ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
        }
    }
    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
    rmatrixluprec(a, 0, m, n, pivots, &tmp, _state);
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = mx;
        for(i=0; i<=m-1; i++)
        {
            ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v);
        }
    }
    ae_frame_leave(_state);
}


void cmatrixlup(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tmp;
    ae_int_t i;
    ae_int_t j;
    double mx;
    double v;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    ae_vector_clear(pivots);
    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);


    /*
     * Internal LU decomposition subroutine.
     * Never call it directly.
     */
    ae_assert(m>0, "CMatrixLUP: incorrect M!", _state);
    ae_assert(n>0, "CMatrixLUP: incorrect N!", _state);

    /*
     * Scale matrix to avoid overflows,
     * decompose it, then scale back.
     */
    mx = (double)(0);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
        }
    }
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = 1/mx;
        for(i=0; i<=m-1; i++)
        {
            ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v);
        }
    }
    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
    cmatrixluprec(a, 0, m, n, pivots, &tmp, _state);
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = mx;
        for(i=0; i<=m-1; i++)
        {
            ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v);
        }
    }
    ae_frame_leave(_state);
}


void rmatrixplu(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tmp;
    ae_int_t i;
    ae_int_t j;
    double mx;
    double v;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    ae_vector_clear(pivots);
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);


    /*
     * Internal LU decomposition subroutine.
     * Never call it directly.
     */
    ae_assert(m>0, "RMatrixPLU: incorrect M!", _state);
    ae_assert(n>0, "RMatrixPLU: incorrect N!", _state);
    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);

    /*
     * Scale matrix to avoid overflows,
     * decompose it, then scale back.
     */
    mx = (double)(0);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
        }
    }
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = 1/mx;
        for(i=0; i<=m-1; i++)
        {
            ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
        }
    }
    rmatrixplurec(a, 0, m, n, pivots, &tmp, _state);
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = mx;
        for(i=0; i<=ae_minint(m, n, _state)-1; i++)
        {
            ae_v_muld(&a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
        }
    }
    ae_frame_leave(_state);
}


void cmatrixplu(/* Complex */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     /* Integer */ ae_vector* pivots,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tmp;
    ae_int_t i;
    ae_int_t j;
    double mx;
    ae_complex v;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    ae_vector_clear(pivots);
    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);


    /*
     * Internal LU decomposition subroutine.
     * Never call it directly.
     */
    ae_assert(m>0, "CMatrixPLU: incorrect M!", _state);
    ae_assert(n>0, "CMatrixPLU: incorrect N!", _state);
    ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
    ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);

    /*
     * Scale matrix to avoid overflows,
     * decompose it, then scale back.
     */
    mx = (double)(0);
    for(i=0; i<=m-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
        }
    }
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = ae_complex_from_d(1/mx);
        for(i=0; i<=m-1; i++)
        {
            ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v);
        }
    }
    cmatrixplurec(a, 0, m, n, pivots, &tmp, _state);
    if( ae_fp_neq(mx,(double)(0)) )
    {
        v = ae_complex_from_d(mx);
        for(i=0; i<=ae_minint(m, n, _state)-1; i++)
        {
            ae_v_cmulc(&a->ptr.pp_complex[i][i], 1, ae_v_len(i,n-1), v);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Advanced interface for SPDMatrixCholesky, performs no temporary allocations.

INPUT PARAMETERS:
    A       -   matrix given by upper or lower triangle
    Offs    -   offset of diagonal block to decompose
    N       -   diagonal block size
    IsUpper -   what half is given
    Tmp     -   temporary array; allocated by function, if its size is too
                small; can be reused on subsequent calls.

OUTPUT PARAMETERS:
    A       -   upper (or lower) triangle contains Cholesky decomposition

RESULT:
    True, on success
    False, on failure

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
ae_bool spdmatrixcholeskyrec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_bool result;


    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);

    /*
     * Allocate temporaries
     */
    if( tmp->cnt<2*n )
    {
        ae_vector_set_length(tmp, 2*n, _state);
    }

    /*
     * Basecases
     */
    if( n<1 )
    {
        result = ae_false;
        return result;
    }
    if( n==1 )
    {
        if( ae_fp_greater(a->ptr.pp_double[offs][offs],(double)(0)) )
        {
            a->ptr.pp_double[offs][offs] = ae_sqrt(a->ptr.pp_double[offs][offs], _state);
            result = ae_true;
        }
        else
        {
            result = ae_false;
        }
        return result;
    }
    if( n<=tsb )
    {
        if( spdmatrixcholeskymkl(a, offs, n, isupper, &result, _state) )
        {
            return result;
        }
    }
    if( n<=tsa )
    {
        result = trfac_spdmatrixcholesky2(a, offs, n, isupper, tmp, _state);
        return result;
    }

    /*
     * Split task into smaller ones
     */
    if( n>tsb )
    {

        /*
         * Split leading B-sized block from the beginning (block-matrix approach)
         */
        n1 = tsb;
        n2 = n-n1;
    }
    else
    {

        /*
         * Smaller than B-size, perform cache-oblivious split
         */
        tiledsplit(n, tsa, &n1, &n2, _state);
    }
    result = spdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state);
    if( !result )
    {
        return result;
    }
    if( n2>0 )
    {
        if( isupper )
        {
            rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 1, a, offs, offs+n1, _state);
            rmatrixsyrk(n2, n1, -1.0, a, offs, offs+n1, 1, 1.0, a, offs+n1, offs+n1, isupper, _state);
        }
        else
        {
            rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 1, a, offs+n1, offs, _state);
            rmatrixsyrk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state);
        }
        result = spdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state);
        if( !result )
        {
            return result;
        }
    }
    return result;
}


/*************************************************************************
Recursive computational subroutine for HPDMatrixCholesky

  -- ALGLIB routine --
     15.12.2009
     Bochkanov Sergey
*************************************************************************/
static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_bool result;


    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);

    /*
     * check N
     */
    if( n<1 )
    {
        result = ae_false;
        return result;
    }

    /*
     * Prepare buffer
     */
    if( tmp->cnt<2*n )
    {
        ae_vector_set_length(tmp, 2*n, _state);
    }

    /*
     * Basecases
     *
     * NOTE: we do not use MKL for basecases because their price is only
     *       minor part of overall running time for N>256.
     */
    if( n==1 )
    {
        if( ae_fp_greater(a->ptr.pp_complex[offs][offs].x,(double)(0)) )
        {
            a->ptr.pp_complex[offs][offs] = ae_complex_from_d(ae_sqrt(a->ptr.pp_complex[offs][offs].x, _state));
            result = ae_true;
        }
        else
        {
            result = ae_false;
        }
        return result;
    }
    if( n<=tsa )
    {
        result = trfac_hpdmatrixcholesky2(a, offs, n, isupper, tmp, _state);
        return result;
    }

    /*
     * Split task into smaller ones
     */
    if( n>tsb )
    {

        /*
         * Split leading B-sized block from the beginning (block-matrix approach)
         */
        n1 = tsb;
        n2 = n-n1;
    }
    else
    {

        /*
         * Smaller than B-size, perform cache-oblivious split
         */
        tiledsplit(n, tsa, &n1, &n2, _state);
    }
    result = trfac_hpdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state);
    if( !result )
    {
        return result;
    }
    if( n2>0 )
    {
        if( isupper )
        {
            cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 2, a, offs, offs+n1, _state);
            cmatrixherk(n2, n1, -1.0, a, offs, offs+n1, 2, 1.0, a, offs+n1, offs+n1, isupper, _state);
        }
        else
        {
            cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 2, a, offs+n1, offs, _state);
            cmatrixherk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state);
        }
        result = trfac_hpdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state);
        if( !result )
        {
            return result;
        }
    }
    return result;
}


/*************************************************************************
Level-2 Hermitian Cholesky subroutine.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992
*************************************************************************/
static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    double ajj;
    ae_complex v;
    double r;
    ae_bool result;


    result = ae_true;
    if( n<0 )
    {
        result = ae_false;
        return result;
    }

    /*
     * Quick return if possible
     */
    if( n==0 )
    {
        return result;
    }
    if( isupper )
    {

        /*
         * Compute the Cholesky factorization A = U'*U.
         */
        for(j=0; j<=n-1; j++)
        {

            /*
             * Compute U(J,J) and test for non-positive-definiteness.
             */
            v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "N", ae_v_len(offs,offs+j-1));
            ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x;
            if( ae_fp_less_eq(ajj,(double)(0)) )
            {
                aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
                result = ae_false;
                return result;
            }
            ajj = ae_sqrt(ajj, _state);
            aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);

            /*
             * Compute elements J+1:N-1 of row J.
             */
            if( j<n-1 )
            {
                if( j>0 )
                {
                    ae_v_cmoveneg(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", ae_v_len(0,j-1));
                    cmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state);
                    ae_v_cadd(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, &tmp->ptr.p_complex[n], 1, "N", ae_v_len(offs+j+1,offs+n-1));
                }
                r = 1/ajj;
                ae_v_cmuld(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
            }
        }
    }
    else
    {

        /*
         * Compute the Cholesky factorization A = L*L'.
         */
        for(j=0; j<=n-1; j++)
        {

            /*
             * Compute L(J+1,J+1) and test for non-positive-definiteness.
             */
            v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", &aaa->ptr.pp_complex[offs+j][offs], 1, "N", ae_v_len(offs,offs+j-1));
            ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x;
            if( ae_fp_less_eq(ajj,(double)(0)) )
            {
                aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
                result = ae_false;
                return result;
            }
            ajj = ae_sqrt(ajj, _state);
            aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);

            /*
             * Compute elements J+1:N of column J.
             */
            if( j<n-1 )
            {
                r = 1/ajj;
                if( j>0 )
                {
                    ae_v_cmove(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", ae_v_len(0,j-1));
                    cmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state);
                    for(i=0; i<=n-j-2; i++)
                    {
                        aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_mul_d(ae_c_sub(aaa->ptr.pp_complex[offs+j+1+i][offs+j],tmp->ptr.p_complex[n+i]),r);
                    }
                }
                else
                {
                    for(i=0; i<=n-j-2; i++)
                    {
                        aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_mul_d(aaa->ptr.pp_complex[offs+j+1+i][offs+j],r);
                    }
                }
            }
        }
    }
    return result;
}


/*************************************************************************
Level-2 Cholesky subroutine

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992
*************************************************************************/
static ae_bool trfac_spdmatrixcholesky2(/* Real    */ ae_matrix* aaa,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    double ajj;
    double v;
    double r;
    ae_bool result;


    result = ae_true;
    if( n<0 )
    {
        result = ae_false;
        return result;
    }

    /*
     * Quick return if possible
     */
    if( n==0 )
    {
        return result;
    }
    if( isupper )
    {

        /*
         * Compute the Cholesky factorization A = U'*U.
         */
        for(j=0; j<=n-1; j++)
        {

            /*
             * Compute U(J,J) and test for non-positive-definiteness.
             */
            v = ae_v_dotproduct(&aaa->ptr.pp_double[offs][offs+j], aaa->stride, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(offs,offs+j-1));
            ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
            if( ae_fp_less_eq(ajj,(double)(0)) )
            {
                aaa->ptr.pp_double[offs+j][offs+j] = ajj;
                result = ae_false;
                return result;
            }
            ajj = ae_sqrt(ajj, _state);
            aaa->ptr.pp_double[offs+j][offs+j] = ajj;

            /*
             * Compute elements J+1:N-1 of row J.
             */
            if( j<n-1 )
            {
                if( j>0 )
                {
                    ae_v_moveneg(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(0,j-1));
                    rmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state);
                    ae_v_add(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, &tmp->ptr.p_double[n], 1, ae_v_len(offs+j+1,offs+n-1));
                }
                r = 1/ajj;
                ae_v_muld(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
            }
        }
    }
    else
    {

        /*
         * Compute the Cholesky factorization A = L*L'.
         */
        for(j=0; j<=n-1; j++)
        {

            /*
             * Compute L(J+1,J+1) and test for non-positive-definiteness.
             */
            v = ae_v_dotproduct(&aaa->ptr.pp_double[offs+j][offs], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(offs,offs+j-1));
            ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
            if( ae_fp_less_eq(ajj,(double)(0)) )
            {
                aaa->ptr.pp_double[offs+j][offs+j] = ajj;
                result = ae_false;
                return result;
            }
            ajj = ae_sqrt(ajj, _state);
            aaa->ptr.pp_double[offs+j][offs+j] = ajj;

            /*
             * Compute elements J+1:N of column J.
             */
            if( j<n-1 )
            {
                r = 1/ajj;
                if( j>0 )
                {
                    ae_v_move(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(0,j-1));
                    rmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state);
                    for(i=0; i<=n-j-2; i++)
                    {
                        aaa->ptr.pp_double[offs+j+1+i][offs+j] = (aaa->ptr.pp_double[offs+j+1+i][offs+j]-tmp->ptr.p_double[n+i])*r;
                    }
                }
                else
                {
                    for(i=0; i<=n-j-2; i++)
                    {
                        aaa->ptr.pp_double[offs+j+1+i][offs+j] = aaa->ptr.pp_double[offs+j+1+i][offs+j]*r;
                    }
                }
            }
        }
    }
    return result;
}


void _sparsedecompositionanalysis_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    sparsedecompositionanalysis *p = (sparsedecompositionanalysis*)_p;
    ae_touch_ptr((void*)p);
    _spcholanalysis_init(&p->analysis, _state, make_automatic);
    _sparsematrix_init(&p->wrka, _state, make_automatic);
    _sparsematrix_init(&p->wrkat, _state, make_automatic);
    _sparsematrix_init(&p->crsa, _state, make_automatic);
    _sparsematrix_init(&p->crsat, _state, make_automatic);
}


void _sparsedecompositionanalysis_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    sparsedecompositionanalysis *dst = (sparsedecompositionanalysis*)_dst;
    sparsedecompositionanalysis *src = (sparsedecompositionanalysis*)_src;
    dst->n = src->n;
    dst->facttype = src->facttype;
    dst->permtype = src->permtype;
    _spcholanalysis_init_copy(&dst->analysis, &src->analysis, _state, make_automatic);
    _sparsematrix_init_copy(&dst->wrka, &src->wrka, _state, make_automatic);
    _sparsematrix_init_copy(&dst->wrkat, &src->wrkat, _state, make_automatic);
    _sparsematrix_init_copy(&dst->crsa, &src->crsa, _state, make_automatic);
    _sparsematrix_init_copy(&dst->crsat, &src->crsat, _state, make_automatic);
}


void _sparsedecompositionanalysis_clear(void* _p)
{
    sparsedecompositionanalysis *p = (sparsedecompositionanalysis*)_p;
    ae_touch_ptr((void*)p);
    _spcholanalysis_clear(&p->analysis);
    _sparsematrix_clear(&p->wrka);
    _sparsematrix_clear(&p->wrkat);
    _sparsematrix_clear(&p->crsa);
    _sparsematrix_clear(&p->crsat);
}


void _sparsedecompositionanalysis_destroy(void* _p)
{
    sparsedecompositionanalysis *p = (sparsedecompositionanalysis*)_p;
    ae_touch_ptr((void*)p);
    _spcholanalysis_destroy(&p->analysis);
    _sparsematrix_destroy(&p->wrka);
    _sparsematrix_destroy(&p->wrkat);
    _sparsematrix_destroy(&p->crsa);
    _sparsematrix_destroy(&p->crsat);
}


#endif
#if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Singular value decomposition of a bidiagonal matrix (extended algorithm)

COMMERCIAL EDITION OF ALGLIB:

  ! Commercial version of ALGLIB includes one  important  improvement   of
  ! this function, which can be used from C++ and C#:
  ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
  !
  ! Intel MKL gives approximately constant  (with  respect  to  number  of
  ! worker threads) acceleration factor which depends on CPU  being  used,
  ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
  ! comparison.
  !
  ! Generally, commercial ALGLIB is several times faster than  open-source
  ! generic C edition, and many times faster than open-source C# edition.
  !
  ! Multithreaded acceleration is NOT supported for this function.
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The algorithm performs the singular value decomposition  of  a  bidiagonal
matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
orthogonal matrices, S - diagonal matrix with non-negative elements on the
main diagonal, in descending order.

The  algorithm  finds  singular  values.  In  addition,  the algorithm can
calculate  matrices  Q  and P (more precisely, not the matrices, but their
product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
matrices U and VT can be of any type, including identity. Furthermore, the
algorithm can calculate Q'*C (this product is calculated more  effectively
than U*Q,  because  this calculation operates with rows instead  of matrix
columns).

The feature of the algorithm is its ability to find  all  singular  values
including those which are arbitrarily close to 0  with  relative  accuracy
close to  machine precision. If the parameter IsFractionalAccuracyRequired
is set to True, all singular values will have high relative accuracy close
to machine precision. If the parameter is set to False, only  the  biggest
singular value will have relative accuracy  close  to  machine  precision.
The absolute error of other singular values is equal to the absolute error
of the biggest singular value.

Input parameters:
    D       -   main diagonal of matrix B.
                Array whose index ranges within [0..N-1].
    E       -   superdiagonal (or subdiagonal) of matrix B.
                Array whose index ranges within [0..N-2].
    N       -   size of matrix B.
    IsUpper -   True, if the matrix is upper bidiagonal.
    IsFractionalAccuracyRequired -
                THIS PARAMETER IS IGNORED SINCE ALGLIB 3.5.0
                SINGULAR VALUES ARE ALWAYS SEARCHED WITH HIGH ACCURACY.
    U       -   matrix to be multiplied by Q.
                Array whose indexes range within [0..NRU-1, 0..N-1].
                The matrix can be bigger, in that case only the  submatrix
                [0..NRU-1, 0..N-1] will be multiplied by Q.
    NRU     -   number of rows in matrix U.
    C       -   matrix to be multiplied by Q'.
                Array whose indexes range within [0..N-1, 0..NCC-1].
                The matrix can be bigger, in that case only the  submatrix
                [0..N-1, 0..NCC-1] will be multiplied by Q'.
    NCC     -   number of columns in matrix C.
    VT      -   matrix to be multiplied by P^T.
                Array whose indexes range within [0..N-1, 0..NCVT-1].
                The matrix can be bigger, in that case only the  submatrix
                [0..N-1, 0..NCVT-1] will be multiplied by P^T.
    NCVT    -   number of columns in matrix VT.

Output parameters:
    D       -   singular values of matrix B in descending order.
    U       -   if NRU>0, contains matrix U*Q.
    VT      -   if NCVT>0, contains matrix (P^T)*VT.
    C       -   if NCC>0, contains matrix Q'*C.

Result:
    True, if the algorithm has converged.
    False, if the algorithm hasn't converged (rare case).

NOTE: multiplication U*Q is performed by means of transposition to internal
      buffer, multiplication and backward transposition. It helps to avoid
      costly columnwise operations and speed-up algorithm.

Additional information:
    The type of convergence is controlled by the internal  parameter  TOL.
    If the parameter is greater than 0, the singular values will have
    relative accuracy TOL. If TOL<0, the singular values will have
    absolute accuracy ABS(TOL)*norm(B).
    By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
    where Epsilon is the machine precision. It is not  recommended  to  use
    TOL less than 10*Epsilon since this will  considerably  slow  down  the
    algorithm and may not lead to error decreasing.

History:
    * 31 March, 2007.
        changed MAXITR from 6 to 12.

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     October 31, 1999.
*************************************************************************/
ae_bool rmatrixbdsvd(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isfractionalaccuracyrequired,
     /* Real    */ ae_matrix* u,
     ae_int_t nru,
     /* Real    */ ae_matrix* c,
     ae_int_t ncc,
     /* Real    */ ae_matrix* vt,
     ae_int_t ncvt,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _e;
    ae_int_t i;
    ae_vector en;
    ae_vector d1;
    ae_vector e1;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_e, 0, sizeof(_e));
    memset(&en, 0, sizeof(en));
    memset(&d1, 0, sizeof(d1));
    memset(&e1, 0, sizeof(e1));
    ae_vector_init_copy(&_e, e, _state, ae_true);
    e = &_e;
    ae_vector_init(&en, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);

    result = ae_false;

    /*
     * Try to use MKL
     */
    ae_vector_set_length(&en, n, _state);
    for(i=0; i<=n-2; i++)
    {
        en.ptr.p_double[i] = e->ptr.p_double[i];
    }
    en.ptr.p_double[n-1] = 0.0;
    if( rmatrixbdsvdmkl(d, &en, n, isupper, u, nru, c, ncc, vt, ncvt, &result, _state) )
    {
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Use ALGLIB code
     */
    ae_vector_set_length(&d1, n+1, _state);
    ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
    if( n>1 )
    {
        ae_vector_set_length(&e1, n-1+1, _state);
        ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
    }
    result = bdsvd_bidiagonalsvddecompositioninternal(&d1, &e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt, _state);
    ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1));
    ae_frame_leave(_state);
    return result;
}


ae_bool bidiagonalsvddecomposition(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isfractionalaccuracyrequired,
     /* Real    */ ae_matrix* u,
     ae_int_t nru,
     /* Real    */ ae_matrix* c,
     ae_int_t ncc,
     /* Real    */ ae_matrix* vt,
     ae_int_t ncvt,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _e;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_e, 0, sizeof(_e));
    ae_vector_init_copy(&_e, e, _state, ae_true);
    e = &_e;

    result = bdsvd_bidiagonalsvddecompositioninternal(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Internal working subroutine for bidiagonal decomposition
*************************************************************************/
static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real    */ ae_vector* d,
     /* Real    */ ae_vector* e,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isfractionalaccuracyrequired,
     /* Real    */ ae_matrix* uu,
     ae_int_t ustart,
     ae_int_t nru,
     /* Real    */ ae_matrix* c,
     ae_int_t cstart,
     ae_int_t ncc,
     /* Real    */ ae_matrix* vt,
     ae_int_t vstart,
     ae_int_t ncvt,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector _e;
    ae_int_t i;
    ae_int_t idir;
    ae_int_t isub;
    ae_int_t iter;
    ae_int_t j;
    ae_int_t ll;
    ae_int_t lll;
    ae_int_t m;
    ae_int_t maxit;
    ae_int_t oldll;
    ae_int_t oldm;
    double abse;
    double abss;
    double cosl;
    double cosr;
    double cs;
    double eps;
    double f;
    double g;
    double h;
    double mu;
    double oldcs;
    double oldsn;
    double r;
    double shift;
    double sigmn;
    double sigmx;
    double sinl;
    double sinr;
    double sll;
    double smax;
    double smin;
    double sminl;
    double sminoa;
    double sn;
    double thresh;
    double tol;
    double tolmul;
    double unfl;
    ae_vector work0;
    ae_vector work1;
    ae_vector work2;
    ae_vector work3;
    ae_int_t maxitr;
    ae_bool matrixsplitflag;
    ae_bool iterflag;
    ae_vector utemp;
    ae_vector vttemp;
    ae_vector ctemp;
    ae_vector etemp;
    ae_matrix ut;
    ae_bool fwddir;
    double tmp;
    ae_int_t mm1;
    ae_int_t mm0;
    ae_bool bchangedir;
    ae_int_t uend;
    ae_int_t cend;
    ae_int_t vend;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_e, 0, sizeof(_e));
    memset(&work0, 0, sizeof(work0));
    memset(&work1, 0, sizeof(work1));
    memset(&work2, 0, sizeof(work2));
    memset(&work3, 0, sizeof(work3));
    memset(&utemp, 0, sizeof(utemp));
    memset(&vttemp, 0, sizeof(vttemp));
    memset(&ctemp, 0, sizeof(ctemp));
    memset(&etemp, 0, sizeof(etemp));
    memset(&ut, 0, sizeof(ut));
    ae_vector_init_copy(&_e, e, _state, ae_true);
    e = &_e;
    ae_vector_init(&work0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work3, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&utemp, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&vttemp, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ctemp, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&etemp, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&ut, 0, 0, DT_REAL, _state, ae_true);

    result = ae_true;
    if( n==0 )
    {
        ae_frame_leave(_state);
        return result;
    }
    if( n==1 )
    {
        if( ae_fp_less(d->ptr.p_double[1],(double)(0)) )
        {
            d->ptr.p_double[1] = -d->ptr.p_double[1];
            if( ncvt>0 )
            {
                ae_v_muld(&vt->ptr.pp_double[vstart][vstart], 1, ae_v_len(vstart,vstart+ncvt-1), -1);
            }
        }
        ae_frame_leave(_state);
        return result;
    }

    /*
     * these initializers are not really necessary,
     * but without them compiler complains about uninitialized locals
     */
    ll = 0;
    oldsn = (double)(0);

    /*
     * init
     */
    ae_vector_set_length(&work0, n-1+1, _state);
    ae_vector_set_length(&work1, n-1+1, _state);
    ae_vector_set_length(&work2, n-1+1, _state);
    ae_vector_set_length(&work3, n-1+1, _state);
    uend = ustart+ae_maxint(nru-1, 0, _state);
    vend = vstart+ae_maxint(ncvt-1, 0, _state);
    cend = cstart+ae_maxint(ncc-1, 0, _state);
    ae_vector_set_length(&utemp, uend+1, _state);
    ae_vector_set_length(&vttemp, vend+1, _state);
    ae_vector_set_length(&ctemp, cend+1, _state);
    maxitr = 12;
    fwddir = ae_true;
    if( nru>0 )
    {
        ae_matrix_set_length(&ut, ustart+n, ustart+nru, _state);
        rmatrixtranspose(nru, n, uu, ustart, ustart, &ut, ustart, ustart, _state);
    }

    /*
     * resize E from N-1 to N
     */
    ae_vector_set_length(&etemp, n+1, _state);
    for(i=1; i<=n-1; i++)
    {
        etemp.ptr.p_double[i] = e->ptr.p_double[i];
    }
    ae_vector_set_length(e, n+1, _state);
    for(i=1; i<=n-1; i++)
    {
        e->ptr.p_double[i] = etemp.ptr.p_double[i];
    }
    e->ptr.p_double[n] = (double)(0);
    idir = 0;

    /*
     * Get machine constants
     */
    eps = ae_machineepsilon;
    unfl = ae_minrealnumber;

    /*
     * If matrix lower bidiagonal, rotate to be upper bidiagonal
     * by applying Givens rotations on the left
     */
    if( !isupper )
    {
        for(i=1; i<=n-1; i++)
        {
            generaterotation(d->ptr.p_double[i], e->ptr.p_double[i], &cs, &sn, &r, _state);
            d->ptr.p_double[i] = r;
            e->ptr.p_double[i] = sn*d->ptr.p_double[i+1];
            d->ptr.p_double[i+1] = cs*d->ptr.p_double[i+1];
            work0.ptr.p_double[i] = cs;
            work1.ptr.p_double[i] = sn;
        }

        /*
         * Update singular vectors if desired
         */
        if( nru>0 )
        {
            applyrotationsfromtheleft(fwddir, 1+ustart-1, n+ustart-1, ustart, uend, &work0, &work1, &ut, &utemp, _state);
        }
        if( ncc>0 )
        {
            applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
        }
    }

    /*
     * Compute singular values to relative accuracy TOL
     * (By setting TOL to be negative, algorithm will compute
     * singular values to absolute accuracy ABS(TOL)*norm(input matrix))
     */
    tolmul = ae_maxreal((double)(10), ae_minreal((double)(100), ae_pow(eps, -0.125, _state), _state), _state);
    tol = tolmul*eps;

    /*
     * Compute approximate maximum, minimum singular values
     */
    smax = (double)(0);
    for(i=1; i<=n; i++)
    {
        smax = ae_maxreal(smax, ae_fabs(d->ptr.p_double[i], _state), _state);
    }
    for(i=1; i<=n-1; i++)
    {
        smax = ae_maxreal(smax, ae_fabs(e->ptr.p_double[i], _state), _state);
    }
    sminl = (double)(0);
    if( ae_fp_greater_eq(tol,(double)(0)) )
    {

        /*
         * Relative accuracy desired
         */
        sminoa = ae_fabs(d->ptr.p_double[1], _state);
        if( ae_fp_neq(sminoa,(double)(0)) )
        {
            mu = sminoa;
            for(i=2; i<=n; i++)
            {
                mu = ae_fabs(d->ptr.p_double[i], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[i-1], _state)));
                sminoa = ae_minreal(sminoa, mu, _state);
                if( ae_fp_eq(sminoa,(double)(0)) )
                {
                    break;
                }
            }
        }
        sminoa = sminoa/ae_sqrt((double)(n), _state);
        thresh = ae_maxreal(tol*sminoa, maxitr*n*n*unfl, _state);
    }
    else
    {

        /*
         * Absolute accuracy desired
         */
        thresh = ae_maxreal(ae_fabs(tol, _state)*smax, maxitr*n*n*unfl, _state);
    }

    /*
     * Prepare for main iteration loop for the singular values
     * (MAXIT is the maximum number of passes through the inner
     * loop permitted before nonconvergence signalled.)
     */
    maxit = maxitr*n*n;
    iter = 0;
    oldll = -1;
    oldm = -1;

    /*
     * M points to last element of unconverged part of matrix
     */
    m = n;

    /*
     * Begin main iteration loop
     */
    for(;;)
    {

        /*
         * Check for convergence or exceeding iteration count
         */
        if( m<=1 )
        {
            break;
        }
        if( iter>maxit )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Find diagonal block of matrix to work on
         */
        if( ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(ae_fabs(d->ptr.p_double[m], _state),thresh) )
        {
            d->ptr.p_double[m] = (double)(0);
        }
        smax = ae_fabs(d->ptr.p_double[m], _state);
        smin = smax;
        matrixsplitflag = ae_false;
        for(lll=1; lll<=m-1; lll++)
        {
            ll = m-lll;
            abss = ae_fabs(d->ptr.p_double[ll], _state);
            abse = ae_fabs(e->ptr.p_double[ll], _state);
            if( ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(abss,thresh) )
            {
                d->ptr.p_double[ll] = (double)(0);
            }
            if( ae_fp_less_eq(abse,thresh) )
            {
                matrixsplitflag = ae_true;
                break;
            }
            smin = ae_minreal(smin, abss, _state);
            smax = ae_maxreal(smax, ae_maxreal(abss, abse, _state), _state);
        }
        if( !matrixsplitflag )
        {
            ll = 0;
        }
        else
        {

            /*
             * Matrix splits since E(LL) = 0
             */
            e->ptr.p_double[ll] = (double)(0);
            if( ll==m-1 )
            {

                /*
                 * Convergence of bottom singular value, return to top of loop
                 */
                m = m-1;
                continue;
            }
        }
        ll = ll+1;

        /*
         * E(LL) through E(M-1) are nonzero, E(LL-1) is zero
         */
        if( ll==m-1 )
        {

            /*
             * 2 by 2 block, handle separately
             */
            bdsvd_svdv2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl, _state);
            d->ptr.p_double[m-1] = sigmx;
            e->ptr.p_double[m-1] = (double)(0);
            d->ptr.p_double[m] = sigmn;

            /*
             * Compute singular vectors, if desired
             */
            if( ncvt>0 )
            {
                mm0 = m+(vstart-1);
                mm1 = m-1+(vstart-1);
                ae_v_moved(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), cosr);
                ae_v_addd(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), sinr);
                ae_v_muld(&vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), cosr);
                ae_v_subd(&vt->ptr.pp_double[mm0][vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), sinr);
                ae_v_move(&vt->ptr.pp_double[mm1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend));
            }
            if( nru>0 )
            {
                mm0 = m+ustart-1;
                mm1 = m-1+ustart-1;
                ae_v_moved(&utemp.ptr.p_double[ustart], 1, &ut.ptr.pp_double[mm1][ustart], 1, ae_v_len(ustart,uend), cosl);
                ae_v_addd(&utemp.ptr.p_double[ustart], 1, &ut.ptr.pp_double[mm0][ustart], 1, ae_v_len(ustart,uend), sinl);
                ae_v_muld(&ut.ptr.pp_double[mm0][ustart], 1, ae_v_len(ustart,uend), cosl);
                ae_v_subd(&ut.ptr.pp_double[mm0][ustart], 1, &ut.ptr.pp_double[mm1][ustart], 1, ae_v_len(ustart,uend), sinl);
                ae_v_move(&ut.ptr.pp_double[mm1][ustart], 1, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend));
            }
            if( ncc>0 )
            {
                mm0 = m+cstart-1;
                mm1 = m-1+cstart-1;
                ae_v_moved(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), cosl);
                ae_v_addd(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), sinl);
                ae_v_muld(&c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), cosl);
                ae_v_subd(&c->ptr.pp_double[mm0][cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), sinl);
                ae_v_move(&c->ptr.pp_double[mm1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend));
            }
            m = m-2;
            continue;
        }

        /*
         * If working on new submatrix, choose shift direction
         * (from larger end diagonal element towards smaller)
         *
         * Previously was
         *     "if (LL>OLDM) or (M<OLDLL) then"
         * fixed thanks to Michael Rolle < m@rolle.name >
         * Very strange that LAPACK still contains it.
         */
        bchangedir = ae_false;
        if( idir==1&&ae_fp_less(ae_fabs(d->ptr.p_double[ll], _state),1.0E-3*ae_fabs(d->ptr.p_double[m], _state)) )
        {
            bchangedir = ae_true;
        }
        if( idir==2&&ae_fp_less(ae_fabs(d->ptr.p_double[m], _state),1.0E-3*ae_fabs(d->ptr.p_double[ll], _state)) )
        {
            bchangedir = ae_true;
        }
        if( (ll!=oldll||m!=oldm)||bchangedir )
        {
            if( ae_fp_greater_eq(ae_fabs(d->ptr.p_double[ll], _state),ae_fabs(d->ptr.p_double[m], _state)) )
            {

                /*
                 * Chase bulge from top (big end) to bottom (small end)
                 */
                idir = 1;
            }
            else
            {

                /*
                 * Chase bulge from bottom (big end) to top (small end)
                 */
                idir = 2;
            }
        }

        /*
         * Apply convergence tests
         */
        if( idir==1 )
        {

            /*
             * Run convergence test in forward direction
             * First apply standard test to bottom of matrix
             */
            if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[m], _state))||(ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh)) )
            {
                e->ptr.p_double[m-1] = (double)(0);
                continue;
            }
            if( ae_fp_greater_eq(tol,(double)(0)) )
            {

                /*
                 * If relative accuracy desired,
                 * apply convergence criterion forward
                 */
                mu = ae_fabs(d->ptr.p_double[ll], _state);
                sminl = mu;
                iterflag = ae_false;
                for(lll=ll; lll<=m-1; lll++)
                {
                    if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) )
                    {
                        e->ptr.p_double[lll] = (double)(0);
                        iterflag = ae_true;
                        break;
                    }
                    mu = ae_fabs(d->ptr.p_double[lll+1], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state)));
                    sminl = ae_minreal(sminl, mu, _state);
                }
                if( iterflag )
                {
                    continue;
                }
            }
        }
        else
        {

            /*
             * Run convergence test in backward direction
             * First apply standard test to top of matrix
             */
            if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[ll], _state))||(ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh)) )
            {
                e->ptr.p_double[ll] = (double)(0);
                continue;
            }
            if( ae_fp_greater_eq(tol,(double)(0)) )
            {

                /*
                 * If relative accuracy desired,
                 * apply convergence criterion backward
                 */
                mu = ae_fabs(d->ptr.p_double[m], _state);
                sminl = mu;
                iterflag = ae_false;
                for(lll=m-1; lll>=ll; lll--)
                {
                    if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) )
                    {
                        e->ptr.p_double[lll] = (double)(0);
                        iterflag = ae_true;
                        break;
                    }
                    mu = ae_fabs(d->ptr.p_double[lll], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state)));
                    sminl = ae_minreal(sminl, mu, _state);
                }
                if( iterflag )
                {
                    continue;
                }
            }
        }
        oldll = ll;
        oldm = m;

        /*
         * Compute shift.  First, test if shifting would ruin relative
         * accuracy, and if so set the shift to zero.
         */
        if( ae_fp_greater_eq(tol,(double)(0))&&ae_fp_less_eq(n*tol*(sminl/smax),ae_maxreal(eps, 0.01*tol, _state)) )
        {

            /*
             * Use a zero shift to avoid loss of relative accuracy
             */
            shift = (double)(0);
        }
        else
        {

            /*
             * Compute the shift from 2-by-2 block at end of matrix
             */
            if( idir==1 )
            {
                sll = ae_fabs(d->ptr.p_double[ll], _state);
                bdsvd_svd2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &shift, &r, _state);
            }
            else
            {
                sll = ae_fabs(d->ptr.p_double[m], _state);
                bdsvd_svd2x2(d->ptr.p_double[ll], e->ptr.p_double[ll], d->ptr.p_double[ll+1], &shift, &r, _state);
            }

            /*
             * Test if shift negligible, and if so set to zero
             */
            if( ae_fp_greater(sll,(double)(0)) )
            {
                if( ae_fp_less(ae_sqr(shift/sll, _state),eps) )
                {
                    shift = (double)(0);
                }
            }
        }

        /*
         * Increment iteration count
         */
        iter = iter+m-ll;

        /*
         * If SHIFT = 0, do simplified QR iteration
         */
        if( ae_fp_eq(shift,(double)(0)) )
        {
            if( idir==1 )
            {

                /*
                 * Chase bulge from top to bottom
                 * Save cosines and sines for later singular vector updates
                 */
                cs = (double)(1);
                oldcs = (double)(1);
                for(i=ll; i<=m-1; i++)
                {
                    generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i], &cs, &sn, &r, _state);
                    if( i>ll )
                    {
                        e->ptr.p_double[i-1] = oldsn*r;
                    }
                    generaterotation(oldcs*r, d->ptr.p_double[i+1]*sn, &oldcs, &oldsn, &tmp, _state);
                    d->ptr.p_double[i] = tmp;
                    work0.ptr.p_double[i-ll+1] = cs;
                    work1.ptr.p_double[i-ll+1] = sn;
                    work2.ptr.p_double[i-ll+1] = oldcs;
                    work3.ptr.p_double[i-ll+1] = oldsn;
                }
                h = d->ptr.p_double[m]*cs;
                d->ptr.p_double[m] = h*oldcs;
                e->ptr.p_double[m-1] = h*oldsn;

                /*
                 * Update singular vectors
                 */
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work2, &work3, &ut, &utemp, _state);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state);
                }

                /*
                 * Test convergence
                 */
                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) )
                {
                    e->ptr.p_double[m-1] = (double)(0);
                }
            }
            else
            {

                /*
                 * Chase bulge from bottom to top
                 * Save cosines and sines for later singular vector updates
                 */
                cs = (double)(1);
                oldcs = (double)(1);
                for(i=m; i>=ll+1; i--)
                {
                    generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i-1], &cs, &sn, &r, _state);
                    if( i<m )
                    {
                        e->ptr.p_double[i] = oldsn*r;
                    }
                    generaterotation(oldcs*r, d->ptr.p_double[i-1]*sn, &oldcs, &oldsn, &tmp, _state);
                    d->ptr.p_double[i] = tmp;
                    work0.ptr.p_double[i-ll] = cs;
                    work1.ptr.p_double[i-ll] = -sn;
                    work2.ptr.p_double[i-ll] = oldcs;
                    work3.ptr.p_double[i-ll] = -oldsn;
                }
                h = d->ptr.p_double[ll]*cs;
                d->ptr.p_double[ll] = h*oldcs;
                e->ptr.p_double[ll] = h*oldsn;

                /*
                 * Update singular vectors
                 */
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work0, &work1, &ut, &utemp, _state);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
                }

                /*
                 * Test convergence
                 */
                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) )
                {
                    e->ptr.p_double[ll] = (double)(0);
                }
            }
        }
        else
        {

            /*
             * Use nonzero shift
             */
            if( idir==1 )
            {

                /*
                 * Chase bulge from top to bottom
                 * Save cosines and sines for later singular vector updates
                 */
                f = (ae_fabs(d->ptr.p_double[ll], _state)-shift)*(bdsvd_extsignbdsqr((double)(1), d->ptr.p_double[ll], _state)+shift/d->ptr.p_double[ll]);
                g = e->ptr.p_double[ll];
                for(i=ll; i<=m-1; i++)
                {
                    generaterotation(f, g, &cosr, &sinr, &r, _state);
                    if( i>ll )
                    {
                        e->ptr.p_double[i-1] = r;
                    }
                    f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i];
                    e->ptr.p_double[i] = cosr*e->ptr.p_double[i]-sinr*d->ptr.p_double[i];
                    g = sinr*d->ptr.p_double[i+1];
                    d->ptr.p_double[i+1] = cosr*d->ptr.p_double[i+1];
                    generaterotation(f, g, &cosl, &sinl, &r, _state);
                    d->ptr.p_double[i] = r;
                    f = cosl*e->ptr.p_double[i]+sinl*d->ptr.p_double[i+1];
                    d->ptr.p_double[i+1] = cosl*d->ptr.p_double[i+1]-sinl*e->ptr.p_double[i];
                    if( i<m-1 )
                    {
                        g = sinl*e->ptr.p_double[i+1];
                        e->ptr.p_double[i+1] = cosl*e->ptr.p_double[i+1];
                    }
                    work0.ptr.p_double[i-ll+1] = cosr;
                    work1.ptr.p_double[i-ll+1] = sinr;
                    work2.ptr.p_double[i-ll+1] = cosl;
                    work3.ptr.p_double[i-ll+1] = sinl;
                }
                e->ptr.p_double[m-1] = f;

                /*
                 * Update singular vectors
                 */
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work2, &work3, &ut, &utemp, _state);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state);
                }

                /*
                 * Test convergence
                 */
                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) )
                {
                    e->ptr.p_double[m-1] = (double)(0);
                }
            }
            else
            {

                /*
                 * Chase bulge from bottom to top
                 * Save cosines and sines for later singular vector updates
                 */
                f = (ae_fabs(d->ptr.p_double[m], _state)-shift)*(bdsvd_extsignbdsqr((double)(1), d->ptr.p_double[m], _state)+shift/d->ptr.p_double[m]);
                g = e->ptr.p_double[m-1];
                for(i=m; i>=ll+1; i--)
                {
                    generaterotation(f, g, &cosr, &sinr, &r, _state);
                    if( i<m )
                    {
                        e->ptr.p_double[i] = r;
                    }
                    f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i-1];
                    e->ptr.p_double[i-1] = cosr*e->ptr.p_double[i-1]-sinr*d->ptr.p_double[i];
                    g = sinr*d->ptr.p_double[i-1];
                    d->ptr.p_double[i-1] = cosr*d->ptr.p_double[i-1];
                    generaterotation(f, g, &cosl, &sinl, &r, _state);
                    d->ptr.p_double[i] = r;
                    f = cosl*e->ptr.p_double[i-1]+sinl*d->ptr.p_double[i-1];
                    d->ptr.p_double[i-1] = cosl*d->ptr.p_double[i-1]-sinl*e->ptr.p_double[i-1];
                    if( i>ll+1 )
                    {
                        g = sinl*e->ptr.p_double[i-2];
                        e->ptr.p_double[i-2] = cosl*e->ptr.p_double[i-2];
                    }
                    work0.ptr.p_double[i-ll] = cosr;
                    work1.ptr.p_double[i-ll] = -sinr;
                    work2.ptr.p_double[i-ll] = cosl;
                    work3.ptr.p_double[i-ll] = -sinl;
                }
                e->ptr.p_double[ll] = f;

                /*
                 * Test convergence
                 */
                if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) )
                {
                    e->ptr.p_double[ll] = (double)(0);
                }

                /*
                 * Update singular vectors if desired
                 */
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work0, &work1, &ut, &utemp, _state);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
                }
            }
        }

        /*
         * QR iteration finished, go back and check convergence
         */
        continue;
    }

    /*
     * All singular values converged, so make them positive
     */
    for(i=1; i<=n; i++)
    {
        if( ae_fp_less(d->ptr.p_double[i],(double)(0)) )
        {
            d->ptr.p_double[i] = -d->ptr.p_double[i];

            /*
             * Change sign of singular vectors, if desired
             */
            if( ncvt>0 )
            {
                ae_v_muld(&vt->ptr.pp_double[i+vstart-1][vstart], 1, ae_v_len(vstart,vend), -1);
            }
        }
    }

    /*
     * Sort the singular values into decreasing order (insertion sort on
     * singular values, but only one transposition per singular vector)
     */
    for(i=1; i<=n-1; i++)
    {

        /*
         * Scan for smallest D(I)
         */
        isub = 1;
        smin = d->ptr.p_double[1];
        for(j=2; j<=n+1-i; j++)
        {
            if( ae_fp_less_eq(d->ptr.p_double[j],smin) )
            {
                isub = j;
                smin = d->ptr.p_double[j];
            }
        }
        if( isub!=n+1-i )
        {

            /*
             * Swap singular values and vectors
             */
            d->ptr.p_double[isub] = d->ptr.p_double[n+1-i];
            d->ptr.p_double[n+1-i] = smin;
            if( ncvt>0 )
            {
                j = n+1-i;
                ae_v_move(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[isub+vstart-1][vstart], 1, ae_v_len(vstart,vend));
                ae_v_move(&vt->ptr.pp_double[isub+vstart-1][vstart], 1, &vt->ptr.pp_double[j+vstart-1][vstart], 1, ae_v_len(vstart,vend));
                ae_v_move(&vt->ptr.pp_double[j+vstart-1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend));
            }
            if( nru>0 )
            {
                j = n+1-i;
                ae_v_move(&utemp.ptr.p_double[ustart], 1, &ut.ptr.pp_double[isub+ustart-1][ustart], 1, ae_v_len(ustart,uend));
                ae_v_move(&ut.ptr.pp_double[isub+ustart-1][ustart], 1, &ut.ptr.pp_double[j+ustart-1][ustart], 1, ae_v_len(ustart,uend));
                ae_v_move(&ut.ptr.pp_double[j+ustart-1][ustart], 1, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend));
            }
            if( ncc>0 )
            {
                j = n+1-i;
                ae_v_move(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[isub+cstart-1][cstart], 1, ae_v_len(cstart,cend));
                ae_v_move(&c->ptr.pp_double[isub+cstart-1][cstart], 1, &c->ptr.pp_double[j+cstart-1][cstart], 1, ae_v_len(cstart,cend));
                ae_v_move(&c->ptr.pp_double[j+cstart-1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend));
            }
        }
    }

    /*
     * Copy U back from temporary storage
     */
    if( nru>0 )
    {
        rmatrixtranspose(n, nru, &ut, ustart, ustart, uu, ustart, ustart, _state);
    }
    ae_frame_leave(_state);
    return result;
}


static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state)
{
    double result;


    if( ae_fp_greater_eq(b,(double)(0)) )
    {
        result = ae_fabs(a, _state);
    }
    else
    {
        result = -ae_fabs(a, _state);
    }
    return result;
}


static void bdsvd_svd2x2(double f,
     double g,
     double h,
     double* ssmin,
     double* ssmax,
     ae_state *_state)
{
    double aas;
    double at;
    double au;
    double c;
    double fa;
    double fhmn;
    double fhmx;
    double ga;
    double ha;

    *ssmin = 0;
    *ssmax = 0;

    fa = ae_fabs(f, _state);
    ga = ae_fabs(g, _state);
    ha = ae_fabs(h, _state);
    fhmn = ae_minreal(fa, ha, _state);
    fhmx = ae_maxreal(fa, ha, _state);
    if( ae_fp_eq(fhmn,(double)(0)) )
    {
        *ssmin = (double)(0);
        if( ae_fp_eq(fhmx,(double)(0)) )
        {
            *ssmax = ga;
        }
        else
        {
            *ssmax = ae_maxreal(fhmx, ga, _state)*ae_sqrt(1+ae_sqr(ae_minreal(fhmx, ga, _state)/ae_maxreal(fhmx, ga, _state), _state), _state);
        }
    }
    else
    {
        if( ae_fp_less(ga,fhmx) )
        {
            aas = 1+fhmn/fhmx;
            at = (fhmx-fhmn)/fhmx;
            au = ae_sqr(ga/fhmx, _state);
            c = 2/(ae_sqrt(aas*aas+au, _state)+ae_sqrt(at*at+au, _state));
            *ssmin = fhmn*c;
            *ssmax = fhmx/c;
        }
        else
        {
            au = fhmx/ga;
            if( ae_fp_eq(au,(double)(0)) )
            {

                /*
                 * Avoid possible harmful underflow if exponent range
                 * asymmetric (true SSMIN may not underflow even if
                 * AU underflows)
                 */
                *ssmin = fhmn*fhmx/ga;
                *ssmax = ga;
            }
            else
            {
                aas = 1+fhmn/fhmx;
                at = (fhmx-fhmn)/fhmx;
                c = 1/(ae_sqrt(1+ae_sqr(aas*au, _state), _state)+ae_sqrt(1+ae_sqr(at*au, _state), _state));
                *ssmin = fhmn*c*au;
                *ssmin = *ssmin+(*ssmin);
                *ssmax = ga/(c+c);
            }
        }
    }
}


static void bdsvd_svdv2x2(double f,
     double g,
     double h,
     double* ssmin,
     double* ssmax,
     double* snr,
     double* csr,
     double* snl,
     double* csl,
     ae_state *_state)
{
    ae_bool gasmal;
    ae_bool swp;
    ae_int_t pmax;
    double a;
    double clt;
    double crt;
    double d;
    double fa;
    double ft;
    double ga;
    double gt;
    double ha;
    double ht;
    double l;
    double m;
    double mm;
    double r;
    double s;
    double slt;
    double srt;
    double t;
    double temp;
    double tsign;
    double tt;
    double v;

    *ssmin = 0;
    *ssmax = 0;
    *snr = 0;
    *csr = 0;
    *snl = 0;
    *csl = 0;

    ft = f;
    fa = ae_fabs(ft, _state);
    ht = h;
    ha = ae_fabs(h, _state);

    /*
     * these initializers are not really necessary,
     * but without them compiler complains about uninitialized locals
     */
    clt = (double)(0);
    crt = (double)(0);
    slt = (double)(0);
    srt = (double)(0);
    tsign = (double)(0);

    /*
     * PMAX points to the maximum absolute element of matrix
     *  PMAX = 1 if F largest in absolute values
     *  PMAX = 2 if G largest in absolute values
     *  PMAX = 3 if H largest in absolute values
     */
    pmax = 1;
    swp = ae_fp_greater(ha,fa);
    if( swp )
    {

        /*
         * Now FA .ge. HA
         */
        pmax = 3;
        temp = ft;
        ft = ht;
        ht = temp;
        temp = fa;
        fa = ha;
        ha = temp;
    }
    gt = g;
    ga = ae_fabs(gt, _state);
    if( ae_fp_eq(ga,(double)(0)) )
    {

        /*
         * Diagonal matrix
         */
        *ssmin = ha;
        *ssmax = fa;
        clt = (double)(1);
        crt = (double)(1);
        slt = (double)(0);
        srt = (double)(0);
    }
    else
    {
        gasmal = ae_true;
        if( ae_fp_greater(ga,fa) )
        {
            pmax = 2;
            if( ae_fp_less(fa/ga,ae_machineepsilon) )
            {

                /*
                 * Case of very large GA
                 */
                gasmal = ae_false;
                *ssmax = ga;
                if( ae_fp_greater(ha,(double)(1)) )
                {
                    v = ga/ha;
                    *ssmin = fa/v;
                }
                else
                {
                    v = fa/ga;
                    *ssmin = v*ha;
                }
                clt = (double)(1);
                slt = ht/gt;
                srt = (double)(1);
                crt = ft/gt;
            }
        }
        if( gasmal )
        {

            /*
             * Normal case
             */
            d = fa-ha;
            if( ae_fp_eq(d,fa) )
            {
                l = (double)(1);
            }
            else
            {
                l = d/fa;
            }
            m = gt/ft;
            t = 2-l;
            mm = m*m;
            tt = t*t;
            s = ae_sqrt(tt+mm, _state);
            if( ae_fp_eq(l,(double)(0)) )
            {
                r = ae_fabs(m, _state);
            }
            else
            {
                r = ae_sqrt(l*l+mm, _state);
            }
            a = 0.5*(s+r);
            *ssmin = ha/a;
            *ssmax = fa*a;
            if( ae_fp_eq(mm,(double)(0)) )
            {

                /*
                 * Note that M is very tiny
                 */
                if( ae_fp_eq(l,(double)(0)) )
                {
                    t = bdsvd_extsignbdsqr((double)(2), ft, _state)*bdsvd_extsignbdsqr((double)(1), gt, _state);
                }
                else
                {
                    t = gt/bdsvd_extsignbdsqr(d, ft, _state)+m/t;
                }
            }
            else
            {
                t = (m/(s+t)+m/(r+l))*(1+a);
            }
            l = ae_sqrt(t*t+4, _state);
            crt = 2/l;
            srt = t/l;
            clt = (crt+srt*m)/a;
            v = ht/ft;
            slt = v*srt/a;
        }
    }
    if( swp )
    {
        *csl = srt;
        *snl = crt;
        *csr = slt;
        *snr = clt;
    }
    else
    {
        *csl = clt;
        *snl = slt;
        *csr = crt;
        *snr = srt;
    }

    /*
     * Correct signs of SSMAX and SSMIN
     */
    if( pmax==1 )
    {
        tsign = bdsvd_extsignbdsqr((double)(1), *csr, _state)*bdsvd_extsignbdsqr((double)(1), *csl, _state)*bdsvd_extsignbdsqr((double)(1), f, _state);
    }
    if( pmax==2 )
    {
        tsign = bdsvd_extsignbdsqr((double)(1), *snr, _state)*bdsvd_extsignbdsqr((double)(1), *csl, _state)*bdsvd_extsignbdsqr((double)(1), g, _state);
    }
    if( pmax==3 )
    {
        tsign = bdsvd_extsignbdsqr((double)(1), *snr, _state)*bdsvd_extsignbdsqr((double)(1), *snl, _state)*bdsvd_extsignbdsqr((double)(1), h, _state);
    }
    *ssmax = bdsvd_extsignbdsqr(*ssmax, tsign, _state);
    *ssmin = bdsvd_extsignbdsqr(*ssmin, tsign*bdsvd_extsignbdsqr((double)(1), f, _state)*bdsvd_extsignbdsqr((double)(1), h, _state), _state);
}


#endif
#if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Singular value decomposition of a rectangular matrix.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The algorithm calculates the singular value decomposition of a matrix of
size MxN: A = U * S * V^T

The algorithm finds the singular values and, optionally, matrices U and V^T.
The algorithm can find both first min(M,N) columns of matrix U and rows of
matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
and NxN respectively).

Take into account that the subroutine does not return matrix V but V^T.

Input parameters:
    A           -   matrix to be decomposed.
                    Array whose indexes range within [0..M-1, 0..N-1].
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    UNeeded     -   0, 1 or 2. See the description of the parameter U.
    VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
    AdditionalMemory -
                    If the parameter:
                     * equals 0, the algorithm doesn't use additional
                       memory (lower requirements, lower performance).
                     * equals 1, the algorithm uses additional
                       memory of size min(M,N)*min(M,N) of real numbers.
                       It often speeds up the algorithm.
                     * equals 2, the algorithm uses additional
                       memory of size M*min(M,N) of real numbers.
                       It allows to get a maximum performance.
                    The recommended value of the parameter is 2.

Output parameters:
    W           -   contains singular values in descending order.
    U           -   if UNeeded=0, U isn't changed, the left singular vectors
                    are not calculated.
                    if Uneeded=1, U contains left singular vectors (first
                    min(M,N) columns of matrix U). Array whose indexes range
                    within [0..M-1, 0..Min(M,N)-1].
                    if UNeeded=2, U contains matrix U wholly. Array whose
                    indexes range within [0..M-1, 0..M-1].
    VT          -   if VTNeeded=0, VT isn't changed, the right singular vectors
                    are not calculated.
                    if VTNeeded=1, VT contains right singular vectors (first
                    min(M,N) rows of matrix V^T). Array whose indexes range
                    within [0..min(M,N)-1, 0..N-1].
                    if VTNeeded=2, VT contains matrix V^T wholly. Array whose
                    indexes range within [0..N-1, 0..N-1].

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
ae_bool rmatrixsvd(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     ae_int_t uneeded,
     ae_int_t vtneeded,
     ae_int_t additionalmemory,
     /* Real    */ ae_vector* w,
     /* Real    */ ae_matrix* u,
     /* Real    */ ae_matrix* vt,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_vector tauq;
    ae_vector taup;
    ae_vector tau;
    ae_vector e;
    ae_vector work;
    ae_matrix t2;
    ae_bool isupper;
    ae_int_t minmn;
    ae_int_t ncu;
    ae_int_t nrvt;
    ae_int_t nru;
    ae_int_t ncvt;
    ae_int_t i;
    ae_int_t j;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&tauq, 0, sizeof(tauq));
    memset(&taup, 0, sizeof(taup));
    memset(&tau, 0, sizeof(tau));
    memset(&e, 0, sizeof(e));
    memset(&work, 0, sizeof(work));
    memset(&t2, 0, sizeof(t2));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_clear(w);
    ae_matrix_clear(u);
    ae_matrix_clear(vt);
    ae_vector_init(&tauq, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&taup, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&t2, 0, 0, DT_REAL, _state, ae_true);

    result = ae_true;
    if( m==0||n==0 )
    {
        ae_frame_leave(_state);
        return result;
    }
    ae_assert(uneeded>=0&&uneeded<=2, "SVDDecomposition: wrong parameters!", _state);
    ae_assert(vtneeded>=0&&vtneeded<=2, "SVDDecomposition: wrong parameters!", _state);
    ae_assert(additionalmemory>=0&&additionalmemory<=2, "SVDDecomposition: wrong parameters!", _state);

    /*
     * initialize
     */
    minmn = ae_minint(m, n, _state);
    ae_vector_set_length(w, minmn+1, _state);
    ncu = 0;
    nru = 0;
    if( uneeded==1 )
    {
        nru = m;
        ncu = minmn;
        ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state);
    }
    if( uneeded==2 )
    {
        nru = m;
        ncu = m;
        ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state);
    }
    nrvt = 0;
    ncvt = 0;
    if( vtneeded==1 )
    {
        nrvt = minmn;
        ncvt = n;
        ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state);
    }
    if( vtneeded==2 )
    {
        nrvt = n;
        ncvt = n;
        ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state);
    }

    /*
     * M much larger than N
     * Use bidiagonal reduction with QR-decomposition
     */
    if( ae_fp_greater((double)(m),1.6*n) )
    {
        if( uneeded==0 )
        {

            /*
             * No left singular vectors to be computed
             */
            rmatrixqr(a, m, n, &tau, _state);
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=i-1; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
            }
            rmatrixbd(a, n, n, &tauq, &taup, _state);
            rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state);
            rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state);
            result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, a, 0, vt, ncvt, _state);
            ae_frame_leave(_state);
            return result;
        }
        else
        {

            /*
             * Left singular vectors (may be full matrix U) to be computed
             */
            rmatrixqr(a, m, n, &tau, _state);
            rmatrixqrunpackq(a, m, n, &tau, ncu, u, _state);
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=i-1; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
            }
            rmatrixbd(a, n, n, &tauq, &taup, _state);
            rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state);
            rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state);
            if( additionalmemory<1 )
            {

                /*
                 * No additional memory can be used
                 */
                rmatrixbdmultiplybyq(a, n, n, &tauq, u, m, n, ae_true, ae_false, _state);
                result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, m, a, 0, vt, ncvt, _state);
            }
            else
            {

                /*
                 * Large U. Transforming intermediate matrix T2
                 */
                ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
                rmatrixbdunpackq(a, n, n, &tauq, n, &t2, _state);
                copymatrix(u, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state);
                inplacetranspose(&t2, 0, n-1, 0, n-1, &work, _state);
                result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, &t2, n, vt, ncvt, _state);
                rmatrixgemm(m, n, n, 1.0, a, 0, 0, 0, &t2, 0, 0, 1, 0.0, u, 0, 0, _state);
            }
            ae_frame_leave(_state);
            return result;
        }
    }

    /*
     * N much larger than M
     * Use bidiagonal reduction with LQ-decomposition
     */
    if( ae_fp_greater((double)(n),1.6*m) )
    {
        if( vtneeded==0 )
        {

            /*
             * No right singular vectors to be computed
             */
            rmatrixlq(a, m, n, &tau, _state);
            for(i=0; i<=m-1; i++)
            {
                for(j=i+1; j<=m-1; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
            }
            rmatrixbd(a, m, m, &tauq, &taup, _state);
            rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state);
            rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state);
            ae_vector_set_length(&work, m+1, _state);
            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
            result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, 0, _state);
            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
            ae_frame_leave(_state);
            return result;
        }
        else
        {

            /*
             * Right singular vectors (may be full matrix VT) to be computed
             */
            rmatrixlq(a, m, n, &tau, _state);
            rmatrixlqunpackq(a, m, n, &tau, nrvt, vt, _state);
            for(i=0; i<=m-1; i++)
            {
                for(j=i+1; j<=m-1; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
            }
            rmatrixbd(a, m, m, &tauq, &taup, _state);
            rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state);
            rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state);
            ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
            if( additionalmemory<1 )
            {

                /*
                 * No additional memory available
                 */
                rmatrixbdmultiplybyp(a, m, m, &taup, vt, m, n, ae_false, ae_true, _state);
                result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, n, _state);
            }
            else
            {

                /*
                 * Large VT. Transforming intermediate matrix T2
                 */
                rmatrixbdunpackpt(a, m, m, &taup, m, &t2, _state);
                result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, &t2, m, _state);
                copymatrix(vt, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state);
                rmatrixgemm(m, n, m, 1.0, &t2, 0, 0, 0, a, 0, 0, 0, 0.0, vt, 0, 0, _state);
            }
            inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
            ae_frame_leave(_state);
            return result;
        }
    }

    /*
     * M<=N
     * We can use inplace transposition of U to get rid of columnwise operations
     */
    if( m<=n )
    {
        rmatrixbd(a, m, n, &tauq, &taup, _state);
        rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state);
        rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state);
        rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state);
        ae_vector_set_length(&work, m+1, _state);
        inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
        result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, a, 0, u, nru, vt, ncvt, _state);
        inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Simple bidiagonal reduction
     */
    rmatrixbd(a, m, n, &tauq, &taup, _state);
    rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state);
    rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state);
    rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state);
    if( additionalmemory<2||uneeded==0 )
    {

        /*
         * We cant use additional memory or there is no need in such operations
         */
        result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, nru, a, 0, vt, ncvt, _state);
    }
    else
    {

        /*
         * We can use additional memory
         */
        ae_matrix_set_length(&t2, minmn-1+1, m-1+1, _state);
        copyandtranspose(u, 0, m-1, 0, minmn-1, &t2, 0, minmn-1, 0, m-1, _state);
        result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, 0, &t2, m, vt, ncvt, _state);
        copyandtranspose(&t2, 0, minmn-1, 0, m-1, u, 0, m-1, 0, minmn-1, _state);
    }
    ae_frame_leave(_state);
    return result;
}


#endif
#if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Estimate of a matrix condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixrcond1(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    ae_vector t;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&pivots, 0, sizeof(pivots));
    memset(&t, 0, sizeof(t));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);

    ae_assert(n>=1, "RMatrixRCond1: N<1!", _state);
    ae_vector_set_length(&t, n, _state);
    for(i=0; i<=n-1; i++)
    {
        t.ptr.p_double[i] = (double)(0);
    }
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
        }
    }
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
    }
    rmatrixlu(a, n, n, &pivots, _state);
    rcond_rmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixrcondinf(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&pivots, 0, sizeof(pivots));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>=1, "RMatrixRCondInf: N<1!", _state);
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        v = (double)(0);
        for(j=0; j<=n-1; j++)
        {
            v = v+ae_fabs(a->ptr.pp_double[i][j], _state);
        }
        nrm = ae_maxreal(nrm, v, _state);
    }
    rmatrixlu(a, n, n, &pivots, _state);
    rcond_rmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Condition number estimate of a symmetric positive definite matrix.

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm of condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    A       -   symmetric positive definite matrix which is given by its
                upper or lower triangle depending on the value of
                IsUpper. Array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format.

Result:
    1/LowerBound(cond(A)), if matrix A is positive definite,
   -1, if matrix A is not positive definite, and its condition number
    could not be found by this algorithm.

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double spdmatrixrcond(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_int_t i;
    ae_int_t j;
    ae_int_t j1;
    ae_int_t j2;
    double v;
    double nrm;
    ae_vector t;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&t, 0, sizeof(t));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);

    ae_vector_set_length(&t, n, _state);
    for(i=0; i<=n-1; i++)
    {
        t.ptr.p_double[i] = (double)(0);
    }
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i;
        }
        for(j=j1; j<=j2; j++)
        {
            if( i==j )
            {
                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state);
            }
            else
            {
                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][j], _state);
                t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
            }
        }
    }
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
    }
    if( spdmatrixcholesky(a, n, isupper, _state) )
    {
        rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state);
        result = v;
    }
    else
    {
        result = (double)(-1);
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Triangular matrix: estimate of a condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A       -   matrix. Array[0..N-1, 0..N-1].
    N       -   size of A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixtrrcond1(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    ae_vector t;
    ae_int_t j1;
    ae_int_t j2;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&pivots, 0, sizeof(pivots));
    memset(&t, 0, sizeof(t));
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);

    ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state);
    ae_vector_set_length(&t, n, _state);
    for(i=0; i<=n-1; i++)
    {
        t.ptr.p_double[i] = (double)(0);
    }
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i+1;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i-1;
        }
        for(j=j1; j<=j2; j++)
        {
            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
        }
        if( isunit )
        {
            t.ptr.p_double[i] = t.ptr.p_double[i]+1;
        }
        else
        {
            t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state);
        }
    }
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
    }
    rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Triangular matrix: estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixtrrcondinf(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    ae_int_t j1;
    ae_int_t j2;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&pivots, 0, sizeof(pivots));
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state);
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i+1;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i-1;
        }
        v = (double)(0);
        for(j=j1; j<=j2; j++)
        {
            v = v+ae_fabs(a->ptr.pp_double[i][j], _state);
        }
        if( isunit )
        {
            v = v+1;
        }
        else
        {
            v = v+ae_fabs(a->ptr.pp_double[i][i], _state);
        }
        nrm = ae_maxreal(nrm, v, _state);
    }
    rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Condition number estimate of a Hermitian positive definite matrix.

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm of condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    A       -   Hermitian positive definite matrix which is given by its
                upper or lower triangle depending on the value of
                IsUpper. Array with elements [0..N-1, 0..N-1].
    N       -   size of matrix A.
    IsUpper -   storage format.

Result:
    1/LowerBound(cond(A)), if matrix A is positive definite,
   -1, if matrix A is not positive definite, and its condition number
    could not be found by this algorithm.

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double hpdmatrixrcond(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_int_t i;
    ae_int_t j;
    ae_int_t j1;
    ae_int_t j2;
    double v;
    double nrm;
    ae_vector t;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&t, 0, sizeof(t));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);

    ae_vector_set_length(&t, n, _state);
    for(i=0; i<=n-1; i++)
    {
        t.ptr.p_double[i] = (double)(0);
    }
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i;
        }
        for(j=j1; j<=j2; j++)
        {
            if( i==j )
            {
                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state);
            }
            else
            {
                t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
                t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
            }
        }
    }
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
    }
    if( hpdmatrixcholesky(a, n, isupper, _state) )
    {
        rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state);
        result = v;
    }
    else
    {
        result = (double)(-1);
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Estimate of a matrix condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixrcond1(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    ae_vector t;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&pivots, 0, sizeof(pivots));
    memset(&t, 0, sizeof(t));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);

    ae_assert(n>=1, "CMatrixRCond1: N<1!", _state);
    ae_vector_set_length(&t, n, _state);
    for(i=0; i<=n-1; i++)
    {
        t.ptr.p_double[i] = (double)(0);
    }
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=n-1; j++)
        {
            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
        }
    }
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
    }
    cmatrixlu(a, n, n, &pivots, _state);
    rcond_cmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixrcondinf(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&pivots, 0, sizeof(pivots));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>=1, "CMatrixRCondInf: N<1!", _state);
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        v = (double)(0);
        for(j=0; j<=n-1; j++)
        {
            v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state);
        }
        nrm = ae_maxreal(nrm, v, _state);
    }
    cmatrixlu(a, n, n, &pivots, _state);
    rcond_cmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA         -   LU decomposition of a matrix in compact form. Output of
                    the RMatrixLU subroutine.
    N           -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixlurcond1(/* Real    */ ae_matrix* lua,
     ae_int_t n,
     ae_state *_state)
{
    double v;
    double result;


    rcond_rmatrixrcondluinternal(lua, n, ae_true, ae_false, (double)(0), &v, _state);
    result = v;
    return result;
}


/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition
(infinity norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA     -   LU decomposition of a matrix in compact form. Output of
                the RMatrixLU subroutine.
    N       -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double rmatrixlurcondinf(/* Real    */ ae_matrix* lua,
     ae_int_t n,
     ae_state *_state)
{
    double v;
    double result;


    rcond_rmatrixrcondluinternal(lua, n, ae_false, ae_false, (double)(0), &v, _state);
    result = v;
    return result;
}


/*************************************************************************
Condition number estimate of a symmetric positive definite matrix given by
Cholesky decomposition.

The algorithm calculates a lower bound of the condition number. In this
case, the algorithm does not return a lower bound of the condition number,
but an inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    CD  - Cholesky decomposition of matrix A,
          output of SMatrixCholesky subroutine.
    N   - size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double spdmatrixcholeskyrcond(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    double v;
    double result;


    rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, (double)(0), &v, _state);
    result = v;
    return result;
}


/*************************************************************************
Condition number estimate of a Hermitian positive definite matrix given by
Cholesky decomposition.

The algorithm calculates a lower bound of the condition number. In this
case, the algorithm does not return a lower bound of the condition number,
but an inverse number (to avoid an overflow in case of a singular matrix).

It should be noted that 1-norm and inf-norm condition numbers of symmetric
matrices are equal, so the algorithm doesn't take into account the
differences between these types of norms.

Input parameters:
    CD  - Cholesky decomposition of matrix A,
          output of SMatrixCholesky subroutine.
    N   - size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double hpdmatrixcholeskyrcond(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    double v;
    double result;


    rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, (double)(0), &v, _state);
    result = v;
    return result;
}


/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA         -   LU decomposition of a matrix in compact form. Output of
                    the CMatrixLU subroutine.
    N           -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixlurcond1(/* Complex */ ae_matrix* lua,
     ae_int_t n,
     ae_state *_state)
{
    double v;
    double result;


    ae_assert(n>=1, "CMatrixLURCond1: N<1!", _state);
    rcond_cmatrixrcondluinternal(lua, n, ae_true, ae_false, 0.0, &v, _state);
    result = v;
    return result;
}


/*************************************************************************
Estimate of the condition number of a matrix given by its LU decomposition
(infinity norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    LUA     -   LU decomposition of a matrix in compact form. Output of
                the CMatrixLU subroutine.
    N       -   size of matrix A.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixlurcondinf(/* Complex */ ae_matrix* lua,
     ae_int_t n,
     ae_state *_state)
{
    double v;
    double result;


    ae_assert(n>=1, "CMatrixLURCondInf: N<1!", _state);
    rcond_cmatrixrcondluinternal(lua, n, ae_false, ae_false, 0.0, &v, _state);
    result = v;
    return result;
}


/*************************************************************************
Triangular matrix: estimate of a condition number (1-norm)

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A       -   matrix. Array[0..N-1, 0..N-1].
    N       -   size of A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixtrrcond1(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    ae_vector t;
    ae_int_t j1;
    ae_int_t j2;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&pivots, 0, sizeof(pivots));
    memset(&t, 0, sizeof(t));
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
    ae_vector_init(&t, 0, DT_REAL, _state, ae_true);

    ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state);
    ae_vector_set_length(&t, n, _state);
    for(i=0; i<=n-1; i++)
    {
        t.ptr.p_double[i] = (double)(0);
    }
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i+1;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i-1;
        }
        for(j=j1; j<=j2; j++)
        {
            t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
        }
        if( isunit )
        {
            t.ptr.p_double[i] = t.ptr.p_double[i]+1;
        }
        else
        {
            t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state);
        }
    }
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
    }
    rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Triangular matrix: estimate of a matrix condition number (infinity-norm).

The algorithm calculates a lower bound of the condition number. In this case,
the algorithm does not return a lower bound of the condition number, but an
inverse number (to avoid an overflow in case of a singular matrix).

Input parameters:
    A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of matrix A.
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   True, if the matrix has a unit diagonal.

Result: 1/LowerBound(cond(A))

NOTE:
    if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
    0.0 is returned in such cases.
*************************************************************************/
double cmatrixtrrcondinf(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double v;
    double nrm;
    ae_vector pivots;
    ae_int_t j1;
    ae_int_t j2;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&pivots, 0, sizeof(pivots));
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state);
    nrm = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i+1;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i-1;
        }
        v = (double)(0);
        for(j=j1; j<=j2; j++)
        {
            v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state);
        }
        if( isunit )
        {
            v = v+1;
        }
        else
        {
            v = v+ae_c_abs(a->ptr.pp_complex[i][i], _state);
        }
        nrm = ae_maxreal(nrm, v, _state);
    }
    rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state);
    result = v;
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Threshold for rcond: matrices with condition number beyond this  threshold
are considered singular.

Threshold must be far enough from underflow, at least Sqr(Threshold)  must
be greater than underflow.
*************************************************************************/
double rcondthreshold(ae_state *_state)
{
    double result;


    result = ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state);
    return result;
}


/*************************************************************************
Internal subroutine for condition number estimation

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992
*************************************************************************/
static void rcond_rmatrixrcondtrinternal(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_bool onenorm,
     double anorm,
     double* rc,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector ex;
    ae_vector ev;
    ae_vector iwork;
    ae_vector tmp;
    ae_int_t i;
    ae_int_t j;
    ae_int_t kase;
    ae_int_t kase1;
    ae_int_t j1;
    ae_int_t j2;
    double ainvnm;
    double maxgrowth;
    double s;

    ae_frame_make(_state, &_frame_block);
    memset(&ex, 0, sizeof(ex));
    memset(&ev, 0, sizeof(ev));
    memset(&iwork, 0, sizeof(iwork));
    memset(&tmp, 0, sizeof(tmp));
    *rc = 0;
    ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);


    /*
     * RC=0 if something happens
     */
    *rc = (double)(0);

    /*
     * init
     */
    if( onenorm )
    {
        kase1 = 1;
    }
    else
    {
        kase1 = 2;
    }
    ae_vector_set_length(&iwork, n+1, _state);
    ae_vector_set_length(&tmp, n, _state);

    /*
     * prepare parameters for triangular solver
     */
    maxgrowth = 1/rcondthreshold(_state);
    s = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i+1;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i-1;
        }
        for(j=j1; j<=j2; j++)
        {
            s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
        }
        if( isunit )
        {
            s = ae_maxreal(s, (double)(1), _state);
        }
        else
        {
            s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][i], _state), _state);
        }
    }
    if( ae_fp_eq(s,(double)(0)) )
    {
        s = (double)(1);
    }
    s = 1/s;

    /*
     * Scale according to S
     */
    anorm = anorm*s;

    /*
     * Quick return if possible
     * We assume that ANORM<>0 after this block
     */
    if( ae_fp_eq(anorm,(double)(0)) )
    {
        ae_frame_leave(_state);
        return;
    }
    if( n==1 )
    {
        *rc = (double)(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Estimate the norm of inv(A).
     */
    ainvnm = (double)(0);
    kase = 0;
    for(;;)
    {
        rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
        if( kase==0 )
        {
            break;
        }

        /*
         * from 1-based array to 0-based
         */
        for(i=0; i<=n-1; i++)
        {
            ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
        }

        /*
         * multiply by inv(A) or inv(A')
         */
        if( kase==kase1 )
        {

            /*
             * multiply by inv(A)
             */
            if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }
        else
        {

            /*
             * multiply by inv(A')
             */
            if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 1, isunit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }

        /*
         * from 0-based array to 1-based
         */
        for(i=n-1; i>=0; i--)
        {
            ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
        }
    }

    /*
     * Compute the estimate of the reciprocal condition number.
     */
    if( ae_fp_neq(ainvnm,(double)(0)) )
    {
        *rc = 1/ainvnm;
        *rc = *rc/anorm;
        if( ae_fp_less(*rc,rcondthreshold(_state)) )
        {
            *rc = (double)(0);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Condition number estimation

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     March 31, 1993
*************************************************************************/
static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_bool onenorm,
     double anorm,
     double* rc,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector ex;
    ae_vector cwork2;
    ae_vector cwork3;
    ae_vector cwork4;
    ae_vector isave;
    ae_vector rsave;
    ae_int_t kase;
    ae_int_t kase1;
    double ainvnm;
    ae_int_t i;
    ae_int_t j;
    ae_int_t j1;
    ae_int_t j2;
    double s;
    double maxgrowth;

    ae_frame_make(_state, &_frame_block);
    memset(&ex, 0, sizeof(ex));
    memset(&cwork2, 0, sizeof(cwork2));
    memset(&cwork3, 0, sizeof(cwork3));
    memset(&cwork4, 0, sizeof(cwork4));
    memset(&isave, 0, sizeof(isave));
    memset(&rsave, 0, sizeof(rsave));
    *rc = 0;
    ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
    ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);


    /*
     * RC=0 if something happens
     */
    *rc = (double)(0);

    /*
     * init
     */
    if( n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    if( n==0 )
    {
        *rc = (double)(1);
        ae_frame_leave(_state);
        return;
    }
    ae_vector_set_length(&cwork2, n+1, _state);

    /*
     * prepare parameters for triangular solver
     */
    maxgrowth = 1/rcondthreshold(_state);
    s = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        if( isupper )
        {
            j1 = i+1;
            j2 = n-1;
        }
        else
        {
            j1 = 0;
            j2 = i-1;
        }
        for(j=j1; j<=j2; j++)
        {
            s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
        }
        if( isunit )
        {
            s = ae_maxreal(s, (double)(1), _state);
        }
        else
        {
            s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][i], _state), _state);
        }
    }
    if( ae_fp_eq(s,(double)(0)) )
    {
        s = (double)(1);
    }
    s = 1/s;

    /*
     * Scale according to S
     */
    anorm = anorm*s;

    /*
     * Quick return if possible
     */
    if( ae_fp_eq(anorm,(double)(0)) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * Estimate the norm of inv(A).
     */
    ainvnm = (double)(0);
    if( onenorm )
    {
        kase1 = 1;
    }
    else
    {
        kase1 = 2;
    }
    kase = 0;
    for(;;)
    {
        rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state);
        if( kase==0 )
        {
            break;
        }

        /*
         * From 1-based to 0-based
         */
        for(i=0; i<=n-1; i++)
        {
            ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
        }

        /*
         * multiply by inv(A) or inv(A')
         */
        if( kase==kase1 )
        {

            /*
             * multiply by inv(A)
             */
            if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }
        else
        {

            /*
             * multiply by inv(A')
             */
            if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 2, isunit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }

        /*
         * from 0-based to 1-based
         */
        for(i=n-1; i>=0; i--)
        {
            ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
        }
    }

    /*
     * Compute the estimate of the reciprocal condition number.
     */
    if( ae_fp_neq(ainvnm,(double)(0)) )
    {
        *rc = 1/ainvnm;
        *rc = *rc/anorm;
        if( ae_fp_less(*rc,rcondthreshold(_state)) )
        {
            *rc = (double)(0);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Internal subroutine for condition number estimation

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992
*************************************************************************/
static void rcond_spdmatrixrcondcholeskyinternal(/* Real    */ ae_matrix* cha,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isnormprovided,
     double anorm,
     double* rc,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_int_t kase;
    double ainvnm;
    ae_vector ex;
    ae_vector ev;
    ae_vector tmp;
    ae_vector iwork;
    double sa;
    double v;
    double maxgrowth;

    ae_frame_make(_state, &_frame_block);
    memset(&ex, 0, sizeof(ex));
    memset(&ev, 0, sizeof(ev));
    memset(&tmp, 0, sizeof(tmp));
    memset(&iwork, 0, sizeof(iwork));
    *rc = 0;
    ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);

    ae_assert(n>=1, "Assertion failed", _state);
    ae_vector_set_length(&tmp, n, _state);

    /*
     * RC=0 if something happens
     */
    *rc = (double)(0);

    /*
     * prepare parameters for triangular solver
     */
    maxgrowth = 1/rcondthreshold(_state);
    sa = (double)(0);
    if( isupper )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=i; j<=n-1; j++)
            {
                sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state);
            }
        }
    }
    else
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=i; j++)
            {
                sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state);
            }
        }
    }
    if( ae_fp_eq(sa,(double)(0)) )
    {
        sa = (double)(1);
    }
    sa = 1/sa;

    /*
     * Estimate the norm of A.
     */
    if( !isnormprovided )
    {
        kase = 0;
        anorm = (double)(0);
        for(;;)
        {
            rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state);
            if( kase==0 )
            {
                break;
            }
            if( isupper )
            {

                /*
                 * Multiply by U
                 */
                for(i=1; i<=n; i++)
                {
                    v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
                    ex.ptr.p_double[i] = v;
                }
                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);

                /*
                 * Multiply by U'
                 */
                for(i=0; i<=n-1; i++)
                {
                    tmp.ptr.p_double[i] = (double)(0);
                }
                for(i=0; i<=n-1; i++)
                {
                    v = ex.ptr.p_double[i+1];
                    ae_v_addd(&tmp.ptr.p_double[i], 1, &cha->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
                }
                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
            }
            else
            {

                /*
                 * Multiply by L'
                 */
                for(i=0; i<=n-1; i++)
                {
                    tmp.ptr.p_double[i] = (double)(0);
                }
                for(i=0; i<=n-1; i++)
                {
                    v = ex.ptr.p_double[i+1];
                    ae_v_addd(&tmp.ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i), v);
                }
                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);

                /*
                 * Multiply by L
                 */
                for(i=n; i>=1; i--)
                {
                    v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-1));
                    ex.ptr.p_double[i] = v;
                }
                ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
            }
        }
    }

    /*
     * Quick return if possible
     */
    if( ae_fp_eq(anorm,(double)(0)) )
    {
        ae_frame_leave(_state);
        return;
    }
    if( n==1 )
    {
        *rc = (double)(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Estimate the 1-norm of inv(A).
     */
    kase = 0;
    for(;;)
    {
        rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
        if( kase==0 )
        {
            break;
        }
        for(i=0; i<=n-1; i++)
        {
            ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
        }
        if( isupper )
        {

            /*
             * Multiply by inv(U').
             */
            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(U).
             */
            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }
        else
        {

            /*
             * Multiply by inv(L).
             */
            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(L').
             */
            if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }
        for(i=n-1; i>=0; i--)
        {
            ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
        }
    }

    /*
     * Compute the estimate of the reciprocal condition number.
     */
    if( ae_fp_neq(ainvnm,(double)(0)) )
    {
        v = 1/ainvnm;
        *rc = v/anorm;
        if( ae_fp_less(*rc,rcondthreshold(_state)) )
        {
            *rc = (double)(0);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Internal subroutine for condition number estimation

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992
*************************************************************************/
static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isnormprovided,
     double anorm,
     double* rc,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector isave;
    ae_vector rsave;
    ae_vector ex;
    ae_vector ev;
    ae_vector tmp;
    ae_int_t kase;
    double ainvnm;
    ae_complex v;
    ae_int_t i;
    ae_int_t j;
    double sa;
    double maxgrowth;

    ae_frame_make(_state, &_frame_block);
    memset(&isave, 0, sizeof(isave));
    memset(&rsave, 0, sizeof(rsave));
    memset(&ex, 0, sizeof(ex));
    memset(&ev, 0, sizeof(ev));
    memset(&tmp, 0, sizeof(tmp));
    *rc = 0;
    ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
    ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&ev, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);

    ae_assert(n>=1, "Assertion failed", _state);
    ae_vector_set_length(&tmp, n, _state);

    /*
     * RC=0 if something happens
     */
    *rc = (double)(0);

    /*
     * prepare parameters for triangular solver
     */
    maxgrowth = 1/rcondthreshold(_state);
    sa = (double)(0);
    if( isupper )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=i; j<=n-1; j++)
            {
                sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state);
            }
        }
    }
    else
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=i; j++)
            {
                sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state);
            }
        }
    }
    if( ae_fp_eq(sa,(double)(0)) )
    {
        sa = (double)(1);
    }
    sa = 1/sa;

    /*
     * Estimate the norm of A
     */
    if( !isnormprovided )
    {
        anorm = (double)(0);
        kase = 0;
        for(;;)
        {
            rcond_cmatrixestimatenorm(n, &ev, &ex, &anorm, &kase, &isave, &rsave, _state);
            if( kase==0 )
            {
                break;
            }
            if( isupper )
            {

                /*
                 * Multiply by U
                 */
                for(i=1; i<=n; i++)
                {
                    v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1));
                    ex.ptr.p_complex[i] = v;
                }
                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);

                /*
                 * Multiply by U'
                 */
                for(i=0; i<=n-1; i++)
                {
                    tmp.ptr.p_complex[i] = ae_complex_from_i(0);
                }
                for(i=0; i<=n-1; i++)
                {
                    v = ex.ptr.p_complex[i+1];
                    ae_v_caddc(&tmp.ptr.p_complex[i], 1, &cha->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(i,n-1), v);
                }
                ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n));
                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
            }
            else
            {

                /*
                 * Multiply by L'
                 */
                for(i=0; i<=n-1; i++)
                {
                    tmp.ptr.p_complex[i] = ae_complex_from_i(0);
                }
                for(i=0; i<=n-1; i++)
                {
                    v = ex.ptr.p_complex[i+1];
                    ae_v_caddc(&tmp.ptr.p_complex[0], 1, &cha->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i), v);
                }
                ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n));
                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);

                /*
                 * Multiply by L
                 */
                for(i=n; i>=1; i--)
                {
                    v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-1));
                    ex.ptr.p_complex[i] = v;
                }
                ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
            }
        }
    }

    /*
     * Quick return if possible
     * After this block we assume that ANORM<>0
     */
    if( ae_fp_eq(anorm,(double)(0)) )
    {
        ae_frame_leave(_state);
        return;
    }
    if( n==1 )
    {
        *rc = (double)(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Estimate the norm of inv(A).
     */
    ainvnm = (double)(0);
    kase = 0;
    for(;;)
    {
        rcond_cmatrixestimatenorm(n, &ev, &ex, &ainvnm, &kase, &isave, &rsave, _state);
        if( kase==0 )
        {
            break;
        }
        for(i=0; i<=n-1; i++)
        {
            ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
        }
        if( isupper )
        {

            /*
             * Multiply by inv(U').
             */
            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(U).
             */
            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }
        else
        {

            /*
             * Multiply by inv(L).
             */
            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(L').
             */
            if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }
        for(i=n-1; i>=0; i--)
        {
            ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
        }
    }

    /*
     * Compute the estimate of the reciprocal condition number.
     */
    if( ae_fp_neq(ainvnm,(double)(0)) )
    {
        *rc = 1/ainvnm;
        *rc = *rc/anorm;
        if( ae_fp_less(*rc,rcondthreshold(_state)) )
        {
            *rc = (double)(0);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Internal subroutine for condition number estimation

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992
*************************************************************************/
static void rcond_rmatrixrcondluinternal(/* Real    */ ae_matrix* lua,
     ae_int_t n,
     ae_bool onenorm,
     ae_bool isanormprovided,
     double anorm,
     double* rc,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector ex;
    ae_vector ev;
    ae_vector iwork;
    ae_vector tmp;
    double v;
    ae_int_t i;
    ae_int_t j;
    ae_int_t kase;
    ae_int_t kase1;
    double ainvnm;
    double maxgrowth;
    double su;
    double sl;
    ae_bool mupper;
    ae_bool munit;

    ae_frame_make(_state, &_frame_block);
    memset(&ex, 0, sizeof(ex));
    memset(&ev, 0, sizeof(ev));
    memset(&iwork, 0, sizeof(iwork));
    memset(&tmp, 0, sizeof(tmp));
    *rc = 0;
    ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);


    /*
     * RC=0 if something happens
     */
    *rc = (double)(0);

    /*
     * init
     */
    if( onenorm )
    {
        kase1 = 1;
    }
    else
    {
        kase1 = 2;
    }
    mupper = ae_true;
    munit = ae_true;
    ae_vector_set_length(&iwork, n+1, _state);
    ae_vector_set_length(&tmp, n, _state);

    /*
     * prepare parameters for triangular solver
     */
    maxgrowth = 1/rcondthreshold(_state);
    su = (double)(0);
    sl = (double)(1);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=i-1; j++)
        {
            sl = ae_maxreal(sl, ae_fabs(lua->ptr.pp_double[i][j], _state), _state);
        }
        for(j=i; j<=n-1; j++)
        {
            su = ae_maxreal(su, ae_fabs(lua->ptr.pp_double[i][j], _state), _state);
        }
    }
    if( ae_fp_eq(su,(double)(0)) )
    {
        su = (double)(1);
    }
    su = 1/su;
    sl = 1/sl;

    /*
     * Estimate the norm of A.
     */
    if( !isanormprovided )
    {
        kase = 0;
        anorm = (double)(0);
        for(;;)
        {
            rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state);
            if( kase==0 )
            {
                break;
            }
            if( kase==kase1 )
            {

                /*
                 * Multiply by U
                 */
                for(i=1; i<=n; i++)
                {
                    v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
                    ex.ptr.p_double[i] = v;
                }

                /*
                 * Multiply by L
                 */
                for(i=n; i>=1; i--)
                {
                    if( i>1 )
                    {
                        v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-2));
                    }
                    else
                    {
                        v = (double)(0);
                    }
                    ex.ptr.p_double[i] = ex.ptr.p_double[i]+v;
                }
            }
            else
            {

                /*
                 * Multiply by L'
                 */
                for(i=0; i<=n-1; i++)
                {
                    tmp.ptr.p_double[i] = (double)(0);
                }
                for(i=0; i<=n-1; i++)
                {
                    v = ex.ptr.p_double[i+1];
                    if( i>=1 )
                    {
                        ae_v_addd(&tmp.ptr.p_double[0], 1, &lua->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), v);
                    }
                    tmp.ptr.p_double[i] = tmp.ptr.p_double[i]+v;
                }
                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));

                /*
                 * Multiply by U'
                 */
                for(i=0; i<=n-1; i++)
                {
                    tmp.ptr.p_double[i] = (double)(0);
                }
                for(i=0; i<=n-1; i++)
                {
                    v = ex.ptr.p_double[i+1];
                    ae_v_addd(&tmp.ptr.p_double[i], 1, &lua->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
                }
                ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
            }
        }
    }

    /*
     * Scale according to SU/SL
     */
    anorm = anorm*su*sl;

    /*
     * Quick return if possible
     * We assume that ANORM<>0 after this block
     */
    if( ae_fp_eq(anorm,(double)(0)) )
    {
        ae_frame_leave(_state);
        return;
    }
    if( n==1 )
    {
        *rc = (double)(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * Estimate the norm of inv(A).
     */
    ainvnm = (double)(0);
    kase = 0;
    for(;;)
    {
        rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
        if( kase==0 )
        {
            break;
        }

        /*
         * from 1-based array to 0-based
         */
        for(i=0; i<=n-1; i++)
        {
            ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
        }

        /*
         * multiply by inv(A) or inv(A')
         */
        if( kase==kase1 )
        {

            /*
             * Multiply by inv(L).
             */
            if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 0, munit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(U).
             */
            if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 0, !munit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }
        else
        {

            /*
             * Multiply by inv(U').
             */
            if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 1, !munit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(L').
             */
            if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 1, munit, maxgrowth, _state) )
            {
                ae_frame_leave(_state);
                return;
            }
        }

        /*
         * from 0-based array to 1-based
         */
        for(i=n-1; i>=0; i--)
        {
            ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
        }
    }

    /*
     * Compute the estimate of the reciprocal condition number.
     */
    if( ae_fp_neq(ainvnm,(double)(0)) )
    {
        *rc = 1/ainvnm;
        *rc = *rc/anorm;
        if( ae_fp_less(*rc,rcondthreshold(_state)) )
        {
            *rc = (double)(0);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Condition number estimation

  -- LAPACK routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     March 31, 1993
*************************************************************************/
static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua,
     ae_int_t n,
     ae_bool onenorm,
     ae_bool isanormprovided,
     double anorm,
     double* rc,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector ex;
    ae_vector cwork2;
    ae_vector cwork3;
    ae_vector cwork4;
    ae_vector isave;
    ae_vector rsave;
    ae_int_t kase;
    ae_int_t kase1;
    double ainvnm;
    ae_complex v;
    ae_int_t i;
    ae_int_t j;
    double su;
    double sl;
    double maxgrowth;

    ae_frame_make(_state, &_frame_block);
    memset(&ex, 0, sizeof(ex));
    memset(&cwork2, 0, sizeof(cwork2));
    memset(&cwork3, 0, sizeof(cwork3));
    memset(&cwork4, 0, sizeof(cwork4));
    memset(&isave, 0, sizeof(isave));
    memset(&rsave, 0, sizeof(rsave));
    *rc = 0;
    ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true);
    ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
    ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);

    if( n<=0 )
    {
        ae_frame_leave(_state);
        return;
    }
    ae_vector_set_length(&cwork2, n+1, _state);
    *rc = (double)(0);
    if( n==0 )
    {
        *rc = (double)(1);
        ae_frame_leave(_state);
        return;
    }

    /*
     * prepare parameters for triangular solver
     */
    maxgrowth = 1/rcondthreshold(_state);
    su = (double)(0);
    sl = (double)(1);
    for(i=0; i<=n-1; i++)
    {
        for(j=0; j<=i-1; j++)
        {
            sl = ae_maxreal(sl, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state);
        }
        for(j=i; j<=n-1; j++)
        {
            su = ae_maxreal(su, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state);
        }
    }
    if( ae_fp_eq(su,(double)(0)) )
    {
        su = (double)(1);
    }
    su = 1/su;
    sl = 1/sl;

    /*
     * Estimate the norm of SU*SL*A.
     */
    if( !isanormprovided )
    {
        anorm = (double)(0);
        if( onenorm )
        {
            kase1 = 1;
        }
        else
        {
            kase1 = 2;
        }
        kase = 0;
        do
        {
            rcond_cmatrixestimatenorm(n, &cwork4, &ex, &anorm, &kase, &isave, &rsave, _state);
            if( kase!=0 )
            {
                if( kase==kase1 )
                {

                    /*
                     * Multiply by U
                     */
                    for(i=1; i<=n; i++)
                    {
                        v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1));
                        ex.ptr.p_complex[i] = v;
                    }

                    /*
                     * Multiply by L
                     */
                    for(i=n; i>=1; i--)
                    {
                        v = ae_complex_from_i(0);
                        if( i>1 )
                        {
                            v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-2));
                        }
                        ex.ptr.p_complex[i] = ae_c_add(v,ex.ptr.p_complex[i]);
                    }
                }
                else
                {

                    /*
                     * Multiply by L'
                     */
                    for(i=1; i<=n; i++)
                    {
                        cwork2.ptr.p_complex[i] = ae_complex_from_i(0);
                    }
                    for(i=1; i<=n; i++)
                    {
                        v = ex.ptr.p_complex[i];
                        if( i>1 )
                        {
                            ae_v_caddc(&cwork2.ptr.p_complex[1], 1, &lua->ptr.pp_complex[i-1][0], 1, "Conj", ae_v_len(1,i-1), v);
                        }
                        cwork2.ptr.p_complex[i] = ae_c_add(cwork2.ptr.p_complex[i],v);
                    }

                    /*
                     * Multiply by U'
                     */
                    for(i=1; i<=n; i++)
                    {
                        ex.ptr.p_complex[i] = ae_complex_from_i(0);
                    }
                    for(i=1; i<=n; i++)
                    {
                        v = cwork2.ptr.p_complex[i];
                        ae_v_caddc(&ex.ptr.p_complex[i], 1, &lua->ptr.pp_complex[i-1][i-1], 1, "Conj", ae_v_len(i,n), v);
                    }
                }
            }
        }
        while(kase!=0);
    }

    /*
     * Scale according to SU/SL
     */
    anorm = anorm*su*sl;

    /*
     * Quick return if possible
     */
    if( ae_fp_eq(anorm,(double)(0)) )
    {
        ae_frame_leave(_state);
        return;
    }

    /*
     * Estimate the norm of inv(A).
     */
    ainvnm = (double)(0);
    if( onenorm )
    {
        kase1 = 1;
    }
    else
    {
        kase1 = 2;
    }
    kase = 0;
    for(;;)
    {
        rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state);
        if( kase==0 )
        {
            break;
        }

        /*
         * From 1-based to 0-based
         */
        for(i=0; i<=n-1; i++)
        {
            ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
        }

        /*
         * multiply by inv(A) or inv(A')
         */
        if( kase==kase1 )
        {

            /*
             * Multiply by inv(L).
             */
            if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 0, ae_true, maxgrowth, _state) )
            {
                *rc = (double)(0);
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(U).
             */
            if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 0, ae_false, maxgrowth, _state) )
            {
                *rc = (double)(0);
                ae_frame_leave(_state);
                return;
            }
        }
        else
        {

            /*
             * Multiply by inv(U').
             */
            if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 2, ae_false, maxgrowth, _state) )
            {
                *rc = (double)(0);
                ae_frame_leave(_state);
                return;
            }

            /*
             * Multiply by inv(L').
             */
            if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 2, ae_true, maxgrowth, _state) )
            {
                *rc = (double)(0);
                ae_frame_leave(_state);
                return;
            }
        }

        /*
         * from 0-based to 1-based
         */
        for(i=n-1; i>=0; i--)
        {
            ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
        }
    }

    /*
     * Compute the estimate of the reciprocal condition number.
     */
    if( ae_fp_neq(ainvnm,(double)(0)) )
    {
        *rc = 1/ainvnm;
        *rc = *rc/anorm;
        if( ae_fp_less(*rc,rcondthreshold(_state)) )
        {
            *rc = (double)(0);
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Internal subroutine for matrix norm estimation

  -- LAPACK auxiliary routine (version 3.0) --
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992
*************************************************************************/
static void rcond_rmatrixestimatenorm(ae_int_t n,
     /* Real    */ ae_vector* v,
     /* Real    */ ae_vector* x,
     /* Integer */ ae_vector* isgn,
     double* est,
     ae_int_t* kase,
     ae_state *_state)
{
    ae_int_t itmax;
    ae_int_t i;
    double t;
    ae_bool flg;
    ae_int_t positer;
    ae_int_t posj;
    ae_int_t posjlast;
    ae_int_t posjump;
    ae_int_t posaltsgn;
    ae_int_t posestold;
    ae_int_t postemp;


    itmax = 5;
    posaltsgn = n+1;
    posestold = n+2;
    postemp = n+3;
    positer = n+1;
    posj = n+2;
    posjlast = n+3;
    posjump = n+4;
    if( *kase==0 )
    {
        ae_vector_set_length(v, n+4, _state);
        ae_vector_set_length(x, n+1, _state);
        ae_vector_set_length(isgn, n+5, _state);
        t = (double)1/(double)n;
        for(i=1; i<=n; i++)
        {
            x->ptr.p_double[i] = t;
        }
        *kase = 1;
        isgn->ptr.p_int[posjump] = 1;
        return;
    }

    /*
     *     ................ ENTRY   (JUMP = 1)
     *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
     */
    if( isgn->ptr.p_int[posjump]==1 )
    {
        if( n==1 )
        {
            v->ptr.p_double[1] = x->ptr.p_double[1];
            *est = ae_fabs(v->ptr.p_double[1], _state);
            *kase = 0;
            return;
        }
        *est = (double)(0);
        for(i=1; i<=n; i++)
        {
            *est = *est+ae_fabs(x->ptr.p_double[i], _state);
        }
        for(i=1; i<=n; i++)
        {
            if( ae_fp_greater_eq(x->ptr.p_double[i],(double)(0)) )
            {
                x->ptr.p_double[i] = (double)(1);
            }
            else
            {
                x->ptr.p_double[i] = (double)(-1);
            }
            isgn->ptr.p_int[i] = ae_sign(x->ptr.p_double[i], _state);
        }
        *kase = 2;
        isgn->ptr.p_int[posjump] = 2;
        return;
    }

    /*
     *     ................ ENTRY   (JUMP = 2)
     *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
     */
    if( isgn->ptr.p_int[posjump]==2 )
    {
        isgn->ptr.p_int[posj] = 1;
        for(i=2; i<=n; i++)
        {
            if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) )
            {
                isgn->ptr.p_int[posj] = i;
            }
        }
        isgn->ptr.p_int[positer] = 2;

        /*
         * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
         */
        for(i=1; i<=n; i++)
        {
            x->ptr.p_double[i] = (double)(0);
        }
        x->ptr.p_double[isgn->ptr.p_int[posj]] = (double)(1);
        *kase = 1;
        isgn->ptr.p_int[posjump] = 3;
        return;
    }

    /*
     *     ................ ENTRY   (JUMP = 3)
     *     X HAS BEEN OVERWRITTEN BY A*X.
     */
    if( isgn->ptr.p_int[posjump]==3 )
    {
        ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n));
        v->ptr.p_double[posestold] = *est;
        *est = (double)(0);
        for(i=1; i<=n; i++)
        {
            *est = *est+ae_fabs(v->ptr.p_double[i], _state);
        }
        flg = ae_false;
        for(i=1; i<=n; i++)
        {
            if( (ae_fp_greater_eq(x->ptr.p_double[i],(double)(0))&&isgn->ptr.p_int[i]<0)||(ae_fp_less(x->ptr.p_double[i],(double)(0))&&isgn->ptr.p_int[i]>=0) )
            {
                flg = ae_true;
            }
        }

        /*
         * REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
         * OR MAY BE CYCLING.
         */
        if( !flg||ae_fp_less_eq(*est,v->ptr.p_double[posestold]) )
        {
            v->ptr.p_double[posaltsgn] = (double)(1);
            for(i=1; i<=n; i++)
            {
                x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1));
                v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn];
            }
            *kase = 1;
            isgn->ptr.p_int[posjump] = 5;
            return;
        }
        for(i=1; i<=n; i++)
        {
            if( ae_fp_greater_eq(x->ptr.p_double[i],(double)(0)) )
            {
                x->ptr.p_double[i] = (double)(1);
                isgn->ptr.p_int[i] = 1;
            }
            else
            {
                x->ptr.p_double[i] = (double)(-1);
                isgn->ptr.p_int[i] = -1;
            }
        }
        *kase = 2;
        isgn->ptr.p_int[posjump] = 4;
        return;
    }

    /*
     *     ................ ENTRY   (JUMP = 4)
     *     X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
     */
    if( isgn->ptr.p_int[posjump]==4 )
    {
        isgn->ptr.p_int[posjlast] = isgn->ptr.p_int[posj];
        isgn->ptr.p_int[posj] = 1;
        for(i=2; i<=n; i++)
        {
            if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) )
            {
                isgn->ptr.p_int[posj] = i;
            }
        }
        if( ae_fp_neq(x->ptr.p_double[isgn->ptr.p_int[posjlast]],ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state))&&isgn->ptr.p_int[positer]<itmax )
        {
            isgn->ptr.p_int[positer] = isgn->ptr.p_int[positer]+1;
            for(i=1; i<=n; i++)
            {
                x->ptr.p_double[i] = (double)(0);
            }
            x->ptr.p_double[isgn->ptr.p_int[posj]] = (double)(1);
            *kase = 1;
            isgn->ptr.p_int[posjump] = 3;
            return;
        }

        /*
         * ITERATION COMPLETE.  FINAL STAGE.
         */
        v->ptr.p_double[posaltsgn] = (double)(1);
        for(i=1; i<=n; i++)
        {
            x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1));
            v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn];
        }
        *kase = 1;
        isgn->ptr.p_int[posjump] = 5;
        return;
    }

    /*
     *     ................ ENTRY   (JUMP = 5)
     *     X HAS BEEN OVERWRITTEN BY A*X.
     */
    if( isgn->ptr.p_int[posjump]==5 )
    {
        v->ptr.p_double[postemp] = (double)(0);
        for(i=1; i<=n; i++)
        {
            v->ptr.p_double[postemp] = v->ptr.p_double[postemp]+ae_fabs(x->ptr.p_double[i], _state);
        }
        v->ptr.p_double[postemp] = 2*v->ptr.p_double[postemp]/(3*n);
        if( ae_fp_greater(v->ptr.p_double[postemp],*est) )
        {
            ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n));
            *est = v->ptr.p_double[postemp];
        }
        *kase = 0;
        return;
    }
}


static void rcond_cmatrixestimatenorm(ae_int_t n,
     /* Complex */ ae_vector* v,
     /* Complex */ ae_vector* x,
     double* est,
     ae_int_t* kase,
     /* Integer */ ae_vector* isave,
     /* Real    */ ae_vector* rsave,
     ae_state *_state)
{
    ae_int_t itmax;
    ae_int_t i;
    ae_int_t iter;
    ae_int_t j;
    ae_int_t jlast;
    ae_int_t jump;
    double absxi;
    double altsgn;
    double estold;
    double safmin;
    double temp;


    /*
     *Executable Statements ..
     */
    itmax = 5;
    safmin = ae_minrealnumber;
    if( *kase==0 )
    {
        ae_vector_set_length(v, n+1, _state);
        ae_vector_set_length(x, n+1, _state);
        ae_vector_set_length(isave, 5, _state);
        ae_vector_set_length(rsave, 4, _state);
        for(i=1; i<=n; i++)
        {
            x->ptr.p_complex[i] = ae_complex_from_d((double)1/(double)n);
        }
        *kase = 1;
        jump = 1;
        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
        return;
    }
    rcond_internalcomplexrcondloadall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);

    /*
     * ENTRY   (JUMP = 1)
     * FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
     */
    if( jump==1 )
    {
        if( n==1 )
        {
            v->ptr.p_complex[1] = x->ptr.p_complex[1];
            *est = ae_c_abs(v->ptr.p_complex[1], _state);
            *kase = 0;
            rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
            return;
        }
        *est = rcond_internalcomplexrcondscsum1(x, n, _state);
        for(i=1; i<=n; i++)
        {
            absxi = ae_c_abs(x->ptr.p_complex[i], _state);
            if( ae_fp_greater(absxi,safmin) )
            {
                x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi);
            }
            else
            {
                x->ptr.p_complex[i] = ae_complex_from_i(1);
            }
        }
        *kase = 2;
        jump = 2;
        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
        return;
    }

    /*
     * ENTRY   (JUMP = 2)
     * FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
     */
    if( jump==2 )
    {
        j = rcond_internalcomplexrcondicmax1(x, n, _state);
        iter = 2;

        /*
         * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
         */
        for(i=1; i<=n; i++)
        {
            x->ptr.p_complex[i] = ae_complex_from_i(0);
        }
        x->ptr.p_complex[j] = ae_complex_from_i(1);
        *kase = 1;
        jump = 3;
        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
        return;
    }

    /*
     * ENTRY   (JUMP = 3)
     * X HAS BEEN OVERWRITTEN BY A*X.
     */
    if( jump==3 )
    {
        ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n));
        estold = *est;
        *est = rcond_internalcomplexrcondscsum1(v, n, _state);

        /*
         * TEST FOR CYCLING.
         */
        if( ae_fp_less_eq(*est,estold) )
        {

            /*
             * ITERATION COMPLETE.  FINAL STAGE.
             */
            altsgn = (double)(1);
            for(i=1; i<=n; i++)
            {
                x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1)));
                altsgn = -altsgn;
            }
            *kase = 1;
            jump = 5;
            rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
            return;
        }
        for(i=1; i<=n; i++)
        {
            absxi = ae_c_abs(x->ptr.p_complex[i], _state);
            if( ae_fp_greater(absxi,safmin) )
            {
                x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi);
            }
            else
            {
                x->ptr.p_complex[i] = ae_complex_from_i(1);
            }
        }
        *kase = 2;
        jump = 4;
        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
        return;
    }

    /*
     * ENTRY   (JUMP = 4)
     * X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
     */
    if( jump==4 )
    {
        jlast = j;
        j = rcond_internalcomplexrcondicmax1(x, n, _state);
        if( ae_fp_neq(ae_c_abs(x->ptr.p_complex[jlast], _state),ae_c_abs(x->ptr.p_complex[j], _state))&&iter<itmax )
        {
            iter = iter+1;

            /*
             * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
             */
            for(i=1; i<=n; i++)
            {
                x->ptr.p_complex[i] = ae_complex_from_i(0);
            }
            x->ptr.p_complex[j] = ae_complex_from_i(1);
            *kase = 1;
            jump = 3;
            rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
            return;
        }

        /*
         * ITERATION COMPLETE.  FINAL STAGE.
         */
        altsgn = (double)(1);
        for(i=1; i<=n; i++)
        {
            x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1)));
            altsgn = -altsgn;
        }
        *kase = 1;
        jump = 5;
        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
        return;
    }

    /*
     * ENTRY   (JUMP = 5)
     * X HAS BEEN OVERWRITTEN BY A*X.
     */
    if( jump==5 )
    {
        temp = 2*(rcond_internalcomplexrcondscsum1(x, n, _state)/(3*n));
        if( ae_fp_greater(temp,*est) )
        {
            ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n));
            *est = temp;
        }
        *kase = 0;
        rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
        return;
    }
}


static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x,
     ae_int_t n,
     ae_state *_state)
{
    ae_int_t i;
    double result;


    result = (double)(0);
    for(i=1; i<=n; i++)
    {
        result = result+ae_c_abs(x->ptr.p_complex[i], _state);
    }
    return result;
}


static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x,
     ae_int_t n,
     ae_state *_state)
{
    ae_int_t i;
    double m;
    ae_int_t result;


    result = 1;
    m = ae_c_abs(x->ptr.p_complex[1], _state);
    for(i=2; i<=n; i++)
    {
        if( ae_fp_greater(ae_c_abs(x->ptr.p_complex[i], _state),m) )
        {
            result = i;
            m = ae_c_abs(x->ptr.p_complex[i], _state);
        }
    }
    return result;
}


static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave,
     /* Real    */ ae_vector* rsave,
     ae_int_t* i,
     ae_int_t* iter,
     ae_int_t* j,
     ae_int_t* jlast,
     ae_int_t* jump,
     double* absxi,
     double* altsgn,
     double* estold,
     double* temp,
     ae_state *_state)
{


    isave->ptr.p_int[0] = *i;
    isave->ptr.p_int[1] = *iter;
    isave->ptr.p_int[2] = *j;
    isave->ptr.p_int[3] = *jlast;
    isave->ptr.p_int[4] = *jump;
    rsave->ptr.p_double[0] = *absxi;
    rsave->ptr.p_double[1] = *altsgn;
    rsave->ptr.p_double[2] = *estold;
    rsave->ptr.p_double[3] = *temp;
}


static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave,
     /* Real    */ ae_vector* rsave,
     ae_int_t* i,
     ae_int_t* iter,
     ae_int_t* j,
     ae_int_t* jlast,
     ae_int_t* jump,
     double* absxi,
     double* altsgn,
     double* estold,
     double* temp,
     ae_state *_state)
{


    *i = isave->ptr.p_int[0];
    *iter = isave->ptr.p_int[1];
    *j = isave->ptr.p_int[2];
    *jlast = isave->ptr.p_int[3];
    *jump = isave->ptr.p_int[4];
    *absxi = rsave->ptr.p_double[0];
    *altsgn = rsave->ptr.p_double[1];
    *estold = rsave->ptr.p_double[2];
    *temp = rsave->ptr.p_double[3];
}


#endif
#if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Basic Cholesky solver for ScaleA*Cholesky(A)'*x = y.

This subroutine assumes that:
* A*ScaleA is well scaled
* A is well-conditioned, so no zero divisions or overflow may occur

INPUT PARAMETERS:
    CHA     -   Cholesky decomposition of A
    SqrtScaleA- square root of scale factor ScaleA
    N       -   matrix size, N>=0.
    IsUpper -   storage type
    XB      -   right part
    Tmp     -   buffer; function automatically allocates it, if it is  too
                small.  It  can  be  reused  if function is called several
                times.

OUTPUT PARAMETERS:
    XB      -   solution

NOTE 1: no assertion or tests are done during algorithm operation
NOTE 2: N=0 will force algorithm to silently return

  -- ALGLIB --
     Copyright 13.10.2010 by Bochkanov Sergey
*************************************************************************/
void fblscholeskysolve(/* Real    */ ae_matrix* cha,
     double sqrtscalea,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* xb,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    double v;


    if( n<=0 )
    {
        return;
    }
    if( tmp->cnt<n )
    {
        ae_vector_set_length(tmp, n, _state);
    }

    /*
     * Scale right part
     */
    v = 1/ae_sqr(sqrtscalea, _state);
    ae_v_muld(&xb->ptr.p_double[0], 1, ae_v_len(0,n-1), v);

    /*
     * Solve A = L*L' or A=U'*U
     */
    if( isupper )
    {

        /*
         * Solve U'*y=b first.
         */
        rmatrixtrsv(n, cha, 0, 0, ae_true, ae_false, 1, xb, 0, _state);

        /*
         * Solve U*x=y then.
         */
        rmatrixtrsv(n, cha, 0, 0, ae_true, ae_false, 0, xb, 0, _state);
    }
    else
    {

        /*
         * Solve L*y=b first
         */
        rmatrixtrsv(n, cha, 0, 0, ae_false, ae_false, 0, xb, 0, _state);

        /*
         * Solve L'*x=y then.
         */
        rmatrixtrsv(n, cha, 0, 0, ae_false, ae_false, 1, xb, 0, _state);
    }
}


/*************************************************************************
Fast basic linear solver: linear SPD CG

Solves (A^T*A + alpha*I)*x = b where:
* A is MxN matrix
* alpha>0 is a scalar
* I is NxN identity matrix
* b is Nx1 vector
* X is Nx1 unknown vector.

N iterations of linear conjugate gradient are used to solve problem.

INPUT PARAMETERS:
    A   -   array[M,N], matrix
    M   -   number of rows
    N   -   number of unknowns
    B   -   array[N], right part
    X   -   initial approxumation, array[N]
    Buf -   buffer; function automatically allocates it, if it is too
            small. It can be reused if function is called several times
            with same M and N.

OUTPUT PARAMETERS:
    X   -   improved solution

NOTES:
*   solver checks quality of improved solution. If (because of problem
    condition number, numerical noise, etc.) new solution is WORSE than
    original approximation, then original approximation is returned.
*   solver assumes that both A, B, Alpha are well scaled (i.e. they are
    less than sqrt(overflow) and greater than sqrt(underflow)).

  -- ALGLIB --
     Copyright 20.08.2009 by Bochkanov Sergey
*************************************************************************/
void fblssolvecgx(/* Real    */ ae_matrix* a,
     ae_int_t m,
     ae_int_t n,
     double alpha,
     /* Real    */ ae_vector* b,
     /* Real    */ ae_vector* x,
     /* Real    */ ae_vector* buf,
     ae_state *_state)
{
    ae_int_t k;
    ae_int_t offsrk;
    ae_int_t offsrk1;
    ae_int_t offsxk;
    ae_int_t offsxk1;
    ae_int_t offspk;
    ae_int_t offspk1;
    ae_int_t offstmp1;
    ae_int_t offstmp2;
    ae_int_t bs;
    double e1;
    double e2;
    double rk2;
    double rk12;
    double pap;
    double s;
    double betak;
    double v1;
    double v2;


    /*
     * Test for special case: B=0
     */
    v1 = ae_v_dotproduct(&b->ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1));
    if( ae_fp_eq(v1,(double)(0)) )
    {
        for(k=0; k<=n-1; k++)
        {
            x->ptr.p_double[k] = (double)(0);
        }
        return;
    }

    /*
     * Offsets inside Buf for:
     * * R[K], R[K+1]
     * * X[K], X[K+1]
     * * P[K], P[K+1]
     * * Tmp1 - array[M], Tmp2 - array[N]
     */
    offsrk = 0;
    offsrk1 = offsrk+n;
    offsxk = offsrk1+n;
    offsxk1 = offsxk+n;
    offspk = offsxk1+n;
    offspk1 = offspk+n;
    offstmp1 = offspk1+n;
    offstmp2 = offstmp1+m;
    bs = offstmp2+n;
    if( buf->cnt<bs )
    {
        ae_vector_set_length(buf, bs, _state);
    }

    /*
     * x(0) = x
     */
    ae_v_move(&buf->ptr.p_double[offsxk], 1, &x->ptr.p_double[0], 1, ae_v_len(offsxk,offsxk+n-1));

    /*
     * r(0) = b-A*x(0)
     * RK2 = r(0)'*r(0)
     */
    rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state);
    rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
    ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
    ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1));
    ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1));
    rk2 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1));
    ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offspk,offspk+n-1));
    e1 = ae_sqrt(rk2, _state);

    /*
     * Cycle
     */
    for(k=0; k<=n-1; k++)
    {

        /*
         * Calculate A*p(k) - store in Buf[OffsTmp2:OffsTmp2+N-1]
         * and p(k)'*A*p(k)  - store in PAP
         *
         * If PAP=0, break (iteration is over)
         */
        rmatrixmv(m, n, a, 0, 0, 0, buf, offspk, buf, offstmp1, _state);
        v1 = ae_v_dotproduct(&buf->ptr.p_double[offstmp1], 1, &buf->ptr.p_double[offstmp1], 1, ae_v_len(offstmp1,offstmp1+m-1));
        v2 = ae_v_dotproduct(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk,offspk+n-1));
        pap = v1+alpha*v2;
        rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
        ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
        if( ae_fp_eq(pap,(double)(0)) )
        {
            break;
        }

        /*
         * S = (r(k)'*r(k))/(p(k)'*A*p(k))
         */
        s = rk2/pap;

        /*
         * x(k+1) = x(k) + S*p(k)
         */
        ae_v_move(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offsxk1,offsxk1+n-1));
        ae_v_addd(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offsxk1,offsxk1+n-1), s);

        /*
         * r(k+1) = r(k) - S*A*p(k)
         * RK12 = r(k+1)'*r(k+1)
         *
         * Break if r(k+1) small enough (when compared to r(k))
         */
        ae_v_move(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk1,offsrk1+n-1));
        ae_v_subd(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk1,offsrk1+n-1), s);
        rk12 = ae_v_dotproduct(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk1,offsrk1+n-1));
        if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*ae_sqrt(rk2, _state)) )
        {

            /*
             * X(k) = x(k+1) before exit -
             * - because we expect to find solution at x(k)
             */
            ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1));
            break;
        }

        /*
         * BetaK = RK12/RK2
         * p(k+1) = r(k+1)+betak*p(k)
         */
        betak = rk12/rk2;
        ae_v_move(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offspk1,offspk1+n-1));
        ae_v_addd(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk1,offspk1+n-1), betak);

        /*
         * r(k) := r(k+1)
         * x(k) := x(k+1)
         * p(k) := p(k+1)
         */
        ae_v_move(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk,offsrk+n-1));
        ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1));
        ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk1], 1, ae_v_len(offspk,offspk+n-1));
        rk2 = rk12;
    }

    /*
     * Calculate E2
     */
    rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state);
    rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
    ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
    ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1));
    ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1));
    v1 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1));
    e2 = ae_sqrt(v1, _state);

    /*
     * Output result (if it was improved)
     */
    if( ae_fp_less(e2,e1) )
    {
        ae_v_move(&x->ptr.p_double[0], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(0,n-1));
    }
}


/*************************************************************************
Construction of linear conjugate gradient solver.

State parameter passed using "shared" semantics (i.e. previous state is NOT
erased). When it is already initialized, we can reause prevously allocated
memory.

INPUT PARAMETERS:
    X       -   initial solution
    B       -   right part
    N       -   system size
    State   -   structure; may be preallocated, if we want to reuse memory

OUTPUT PARAMETERS:
    State   -   structure which is used by FBLSCGIteration() to store
                algorithm state between subsequent calls.

NOTE: no error checking is done; caller must check all parameters, prevent
      overflows, and so on.

  -- ALGLIB --
     Copyright 22.10.2009 by Bochkanov Sergey
*************************************************************************/
void fblscgcreate(/* Real    */ ae_vector* x,
     /* Real    */ ae_vector* b,
     ae_int_t n,
     fblslincgstate* state,
     ae_state *_state)
{


    if( state->b.cnt<n )
    {
        ae_vector_set_length(&state->b, n, _state);
    }
    if( state->rk.cnt<n )
    {
        ae_vector_set_length(&state->rk, n, _state);
    }
    if( state->rk1.cnt<n )
    {
        ae_vector_set_length(&state->rk1, n, _state);
    }
    if( state->xk.cnt<n )
    {
        ae_vector_set_length(&state->xk, n, _state);
    }
    if( state->xk1.cnt<n )
    {
        ae_vector_set_length(&state->xk1, n, _state);
    }
    if( state->pk.cnt<n )
    {
        ae_vector_set_length(&state->pk, n, _state);
    }
    if( state->pk1.cnt<n )
    {
        ae_vector_set_length(&state->pk1, n, _state);
    }
    if( state->tmp2.cnt<n )
    {
        ae_vector_set_length(&state->tmp2, n, _state);
    }
    if( state->x.cnt<n )
    {
        ae_vector_set_length(&state->x, n, _state);
    }
    if( state->ax.cnt<n )
    {
        ae_vector_set_length(&state->ax, n, _state);
    }
    state->n = n;
    ae_v_move(&state->xk.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_move(&state->b.ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_vector_set_length(&state->rstate.ia, 1+1, _state);
    ae_vector_set_length(&state->rstate.ra, 6+1, _state);
    state->rstate.stage = -1;
}


/*************************************************************************
Linear CG solver, function relying on reverse communication to calculate
matrix-vector products.

See comments for FBLSLinCGState structure for more info.

  -- ALGLIB --
     Copyright 22.10.2009 by Bochkanov Sergey
*************************************************************************/
ae_bool fblscgiteration(fblslincgstate* state, ae_state *_state)
{
    ae_int_t n;
    ae_int_t k;
    double rk2;
    double rk12;
    double pap;
    double s;
    double betak;
    double v1;
    double v2;
    ae_bool result;


    /*
     * Reverse communication preparations
     * I know it looks ugly, but it works the same way
     * anywhere from C++ to Python.
     *
     * This code initializes locals by:
     * * random values determined during code
     *   generation - on first subroutine call
     * * values from previous call - on subsequent calls
     */
    if( state->rstate.stage>=0 )
    {
        n = state->rstate.ia.ptr.p_int[0];
        k = state->rstate.ia.ptr.p_int[1];
        rk2 = state->rstate.ra.ptr.p_double[0];
        rk12 = state->rstate.ra.ptr.p_double[1];
        pap = state->rstate.ra.ptr.p_double[2];
        s = state->rstate.ra.ptr.p_double[3];
        betak = state->rstate.ra.ptr.p_double[4];
        v1 = state->rstate.ra.ptr.p_double[5];
        v2 = state->rstate.ra.ptr.p_double[6];
    }
    else
    {
        n = 359;
        k = -58;
        rk2 = -919;
        rk12 = -909;
        pap = 81;
        s = 255;
        betak = 74;
        v1 = -788;
        v2 = 809;
    }
    if( state->rstate.stage==0 )
    {
        goto lbl_0;
    }
    if( state->rstate.stage==1 )
    {
        goto lbl_1;
    }
    if( state->rstate.stage==2 )
    {
        goto lbl_2;
    }

    /*
     * Routine body
     */

    /*
     * prepare locals
     */
    n = state->n;

    /*
     * Test for special case: B=0
     */
    v1 = ae_v_dotproduct(&state->b.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
    if( ae_fp_eq(v1,(double)(0)) )
    {
        for(k=0; k<=n-1; k++)
        {
            state->xk.ptr.p_double[k] = (double)(0);
        }
        result = ae_false;
        return result;
    }

    /*
     * r(0) = b-A*x(0)
     * RK2 = r(0)'*r(0)
     */
    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    state->rstate.stage = 0;
    goto lbl_rcomm;
lbl_0:
    ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
    rk2 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_move(&state->pk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    state->e1 = ae_sqrt(rk2, _state);

    /*
     * Cycle
     */
    k = 0;
lbl_3:
    if( k>n-1 )
    {
        goto lbl_5;
    }

    /*
     * Calculate A*p(k) - store in State.Tmp2
     * and p(k)'*A*p(k)  - store in PAP
     *
     * If PAP=0, break (iteration is over)
     */
    ae_v_move(&state->x.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    state->rstate.stage = 1;
    goto lbl_rcomm;
lbl_1:
    ae_v_move(&state->tmp2.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
    pap = state->xax;
    if( !ae_isfinite(pap, _state) )
    {
        goto lbl_5;
    }
    if( ae_fp_less_eq(pap,(double)(0)) )
    {
        goto lbl_5;
    }

    /*
     * S = (r(k)'*r(k))/(p(k)'*A*p(k))
     */
    s = rk2/pap;

    /*
     * x(k+1) = x(k) + S*p(k)
     */
    ae_v_move(&state->xk1.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_addd(&state->xk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), s);

    /*
     * r(k+1) = r(k) - S*A*p(k)
     * RK12 = r(k+1)'*r(k+1)
     *
     * Break if r(k+1) small enough (when compared to r(k))
     */
    ae_v_move(&state->rk1.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_subd(&state->rk1.ptr.p_double[0], 1, &state->tmp2.ptr.p_double[0], 1, ae_v_len(0,n-1), s);
    rk12 = ae_v_dotproduct(&state->rk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
    if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*state->e1) )
    {

        /*
         * X(k) = x(k+1) before exit -
         * - because we expect to find solution at x(k)
         */
        ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
        goto lbl_5;
    }

    /*
     * BetaK = RK12/RK2
     * p(k+1) = r(k+1)+betak*p(k)
     *
     * NOTE: we expect that BetaK won't overflow because of
     * "Sqrt(RK12)<=100*MachineEpsilon*E1" test above.
     */
    betak = rk12/rk2;
    ae_v_move(&state->pk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_addd(&state->pk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak);

    /*
     * r(k) := r(k+1)
     * x(k) := x(k+1)
     * p(k) := p(k+1)
     */
    ae_v_move(&state->rk.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_move(&state->pk.ptr.p_double[0], 1, &state->pk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
    rk2 = rk12;
    k = k+1;
    goto lbl_3;
lbl_5:

    /*
     * Calculate E2
     */
    ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    state->rstate.stage = 2;
    goto lbl_rcomm;
lbl_2:
    ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
    ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
    v1 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
    state->e2 = ae_sqrt(v1, _state);
    result = ae_false;
    return result;

    /*
     * Saving state
     */
lbl_rcomm:
    result = ae_true;
    state->rstate.ia.ptr.p_int[0] = n;
    state->rstate.ia.ptr.p_int[1] = k;
    state->rstate.ra.ptr.p_double[0] = rk2;
    state->rstate.ra.ptr.p_double[1] = rk12;
    state->rstate.ra.ptr.p_double[2] = pap;
    state->rstate.ra.ptr.p_double[3] = s;
    state->rstate.ra.ptr.p_double[4] = betak;
    state->rstate.ra.ptr.p_double[5] = v1;
    state->rstate.ra.ptr.p_double[6] = v2;
    return result;
}


/*************************************************************************
Construction of GMRES(k) solver.

State parameter passed using "shared" semantics (i.e. previous state is NOT
erased). When it is already initialized, we can reause prevously allocated
memory.

After (but not before!) initialization you can tweak following fields (they
are initialized by default values, but you can change it):
* State.EpsOrt - stop if norm of new candidate for orthogonalization is below EpsOrt
* State.EpsRes - stop of residual decreased below EpsRes*|B|
* State.EpsRed - stop if relative reduction of residual |R(k+1)|/|R(k)|>EpsRed

INPUT PARAMETERS:
    B       -   right part
    N       -   system size
    K       -   iterations count, K>=1
    State   -   structure; may be preallocated, if we want to reuse memory

OUTPUT PARAMETERS:
    State   -   structure which is used by FBLSGMRESIteration() to store
                algorithm state between subsequent calls.

NOTE: no error checking is done; caller must check all parameters, prevent
      overflows, and so on.

  -- ALGLIB --
     Copyright 18.11.2020 by Bochkanov Sergey
*************************************************************************/
void fblsgmrescreate(/* Real    */ ae_vector* b,
     ae_int_t n,
     ae_int_t k,
     fblsgmresstate* state,
     ae_state *_state)
{


    ae_assert((n>0&&k>0)&&k<=n, "FBLSGMRESCreate: incorrect params", _state);
    state->n = n;
    state->itscnt = k;
    state->epsort = (1000+ae_sqrt((double)(n), _state))*ae_machineepsilon;
    state->epsres = (1000+ae_sqrt((double)(n), _state))*ae_machineepsilon;
    state->epsred = 1.0;
    state->epsdiag = (10000+n)*ae_machineepsilon;
    state->itsperformed = 0;
    state->retcode = 0;
    rcopyallocv(n, b, &state->b, _state);
    rallocv(n, &state->x, _state);
    rallocv(n, &state->ax, _state);
    ae_vector_set_length(&state->rstate.ia, 4+1, _state);
    ae_vector_set_length(&state->rstate.ra, 10+1, _state);
    state->rstate.stage = -1;
}


/*************************************************************************
Linear CG solver, function relying on reverse communication to calculate
matrix-vector products.

See comments for FBLSLinCGState structure for more info.

  -- ALGLIB --
     Copyright 22.10.2009 by Bochkanov Sergey
*************************************************************************/
ae_bool fblsgmresiteration(fblsgmresstate* state, ae_state *_state)
{
    ae_int_t n;
    ae_int_t itidx;
    ae_int_t kdim;
    double rmax;
    double rmindiag;
    double cs;
    double sn;
    double v;
    double vv;
    double anrm;
    double qnrm;
    double bnrm;
    double resnrm;
    double prevresnrm;
    ae_int_t i;
    ae_int_t j;
    ae_bool result;


    /*
     * Reverse communication preparations
     * I know it looks ugly, but it works the same way
     * anywhere from C++ to Python.
     *
     * This code initializes locals by:
     * * random values determined during code
     *   generation - on first subroutine call
     * * values from previous call - on subsequent calls
     */
    if( state->rstate.stage>=0 )
    {
        n = state->rstate.ia.ptr.p_int[0];
        itidx = state->rstate.ia.ptr.p_int[1];
        kdim = state->rstate.ia.ptr.p_int[2];
        i = state->rstate.ia.ptr.p_int[3];
        j = state->rstate.ia.ptr.p_int[4];
        rmax = state->rstate.ra.ptr.p_double[0];
        rmindiag = state->rstate.ra.ptr.p_double[1];
        cs = state->rstate.ra.ptr.p_double[2];
        sn = state->rstate.ra.ptr.p_double[3];
        v = state->rstate.ra.ptr.p_double[4];
        vv = state->rstate.ra.ptr.p_double[5];
        anrm = state->rstate.ra.ptr.p_double[6];
        qnrm = state->rstate.ra.ptr.p_double[7];
        bnrm = state->rstate.ra.ptr.p_double[8];
        resnrm = state->rstate.ra.ptr.p_double[9];
        prevresnrm = state->rstate.ra.ptr.p_double[10];
    }
    else
    {
        n = 205;
        itidx = -838;
        kdim = 939;
        i = -526;
        j = 763;
        rmax = -541;
        rmindiag = -698;
        cs = -900;
        sn = -318;
        v = -940;
        vv = 1016;
        anrm = -229;
        qnrm = -536;
        bnrm = 487;
        resnrm = -115;
        prevresnrm = 886;
    }
    if( state->rstate.stage==0 )
    {
        goto lbl_0;
    }

    /*
     * Routine body
     */
    n = state->n;
    state->retcode = 1;

    /*
     * Set up Q0
     */
    rsetallocv(n, 0.0, &state->xs, _state);
    bnrm = ae_sqrt(rdotv2(n, &state->b, _state), _state);
    if( ae_fp_eq(bnrm,(double)(0)) )
    {
        result = ae_false;
        return result;
    }
    rallocm(state->itscnt+1, n, &state->qi, _state);
    rallocm(state->itscnt, n, &state->aqi, _state);
    rcopymulvr(n, 1/bnrm, &state->b, &state->qi, 0, _state);
    rsetallocm(state->itscnt+1, state->itscnt, 0.0, &state->h, _state);
    rsetallocm(state->itscnt+1, state->itscnt, 0.0, &state->hr, _state);
    rsetallocm(state->itscnt+1, state->itscnt+1, 0.0, &state->hq, _state);
    for(i=0; i<=state->itscnt; i++)
    {
        state->hq.ptr.pp_double[i][i] = (double)(1);
    }
    rsetallocv(state->itscnt+1, 0.0, &state->hqb, _state);
    state->hqb.ptr.p_double[0] = bnrm;

    /*
     * Perform iteration
     */
    resnrm = bnrm;
    kdim = 0;
    rmax = (double)(0);
    rmindiag = 1.0E99;
    rsetallocv(state->itscnt, 0.0, &state->ys, _state);
    rallocv(ae_maxint(n, state->itscnt+2, _state), &state->tmp0, _state);
    rallocv(ae_maxint(n, state->itscnt+2, _state), &state->tmp1, _state);
    itidx = 0;
lbl_1:
    if( itidx>state->itscnt-1 )
    {
        goto lbl_3;
    }
    prevresnrm = resnrm;

    /*
     * Compute A*Qi[ItIdx], then compute Qi[ItIdx+1]
     */
    rcopyrv(n, &state->qi, itidx, &state->x, _state);
    state->rstate.stage = 0;
    goto lbl_rcomm;
lbl_0:
    rcopyvr(n, &state->ax, &state->aqi, itidx, _state);
    anrm = ae_sqrt(rdotv2(n, &state->ax, _state), _state);
    if( ae_fp_eq(anrm,(double)(0)) )
    {
        state->retcode = 2;
        goto lbl_3;
    }
    rowwisegramschmidt(&state->qi, itidx+1, n, &state->ax, &state->tmp0, ae_true, _state);
    rowwisegramschmidt(&state->qi, itidx+1, n, &state->ax, &state->tmp1, ae_true, _state);
    raddvc(itidx+1, 1.0, &state->tmp0, &state->h, itidx, _state);
    raddvc(itidx+1, 1.0, &state->tmp1, &state->h, itidx, _state);
    qnrm = ae_sqrt(rdotv2(n, &state->ax, _state), _state);
    state->h.ptr.pp_double[itidx+1][itidx] = qnrm;
    rmulv(n, 1/coalesce(qnrm, (double)(1), _state), &state->ax, _state);
    rcopyvr(n, &state->ax, &state->qi, itidx+1, _state);

    /*
     * We have QR decomposition of H from the previous iteration:
     * * (ItIdx+1)*(ItIdx+1) orthogonal HQ embedded into larger (ItIdx+2)*(ItIdx+2) identity matrix
     * * (ItIdx+1)*ItIdx     triangular HR embedded into larger (ItIdx+2)*(ItIdx+1) zero matrix
     *
     * We just have to update QR decomposition after one more column is added to H:
     * * multiply this column by HQ to obtain (ItIdx+2)-dimensional vector X
     * * generate rotation to nullify last element of X to obtain (ItIdx+1)-dimensional vector Y
     *   that is copied into (ItIdx+1)-th column of HR
     * * apply same rotation to HQ
     * * apply same rotation to HQB - current right-hand side
     */
    rcopycv(itidx+2, &state->h, itidx, &state->tmp0, _state);
    rmatrixgemv(itidx+2, itidx+2, 1.0, &state->hq, 0, 0, 0, &state->tmp0, 0, 0.0, &state->tmp1, 0, _state);
    generaterotation(state->tmp1.ptr.p_double[itidx], state->tmp1.ptr.p_double[itidx+1], &cs, &sn, &v, _state);
    state->tmp1.ptr.p_double[itidx] = v;
    state->tmp1.ptr.p_double[itidx+1] = (double)(0);
    rmax = ae_maxreal(rmax, rmaxabsv(itidx+2, &state->tmp1, _state), _state);
    rmindiag = ae_minreal(rmindiag, ae_fabs(v, _state), _state);
    if( ae_fp_less_eq(rmindiag,rmax*state->epsdiag) )
    {
        state->retcode = 3;
        goto lbl_3;
    }
    rcopyvc(itidx+2, &state->tmp1, &state->hr, itidx, _state);
    for(j=0; j<=itidx+1; j++)
    {
        v = state->hq.ptr.pp_double[itidx+0][j];
        vv = state->hq.ptr.pp_double[itidx+1][j];
        state->hq.ptr.pp_double[itidx+0][j] = cs*v+sn*vv;
        state->hq.ptr.pp_double[itidx+1][j] = -sn*v+cs*vv;
    }
    v = state->hqb.ptr.p_double[itidx+0];
    vv = state->hqb.ptr.p_double[itidx+1];
    state->hqb.ptr.p_double[itidx+0] = cs*v+sn*vv;
    state->hqb.ptr.p_double[itidx+1] = -sn*v+cs*vv;
    resnrm = ae_fabs(state->hqb.ptr.p_double[itidx+1], _state);

    /*
     * Previous attempt to extend R was successful (no small diagonal elements).
     * Increase Krylov subspace dimensionality.
     */
    kdim = kdim+1;

    /*
     * Iteration is over.
     * Terminate if:
     * * last Qi was nearly zero after orthogonalization.
     * * sufficient decrease of residual
     * * stagnation of residual
     */
    state->itsperformed = state->itsperformed+1;
    if( ae_fp_less_eq(qnrm,state->epsort*anrm)||ae_fp_eq(qnrm,(double)(0)) )
    {
        state->retcode = 4;
        goto lbl_3;
    }
    if( ae_fp_less_eq(resnrm,state->epsres*bnrm) )
    {
        state->retcode = 5;
        goto lbl_3;
    }
    if( ae_fp_greater(resnrm/prevresnrm,state->epsred) )
    {
        state->retcode = 6;
        goto lbl_3;
    }
    itidx = itidx+1;
    goto lbl_1;
lbl_3:

    /*
     * Post-solve
     */
    if( kdim>0 )
    {
        rcopyv(kdim, &state->hqb, &state->ys, _state);
        rmatrixtrsv(kdim, &state->hr, 0, 0, ae_true, ae_false, 0, &state->ys, 0, _state);
        rmatrixmv(n, kdim, &state->qi, 0, 0, 1, &state->ys, 0, &state->xs, 0, _state);
    }
    result = ae_false;
    return result;

    /*
     * Saving state
     */
lbl_rcomm:
    result = ae_true;
    state->rstate.ia.ptr.p_int[0] = n;
    state->rstate.ia.ptr.p_int[1] = itidx;
    state->rstate.ia.ptr.p_int[2] = kdim;
    state->rstate.ia.ptr.p_int[3] = i;
    state->rstate.ia.ptr.p_int[4] = j;
    state->rstate.ra.ptr.p_double[0] = rmax;
    state->rstate.ra.ptr.p_double[1] = rmindiag;
    state->rstate.ra.ptr.p_double[2] = cs;
    state->rstate.ra.ptr.p_double[3] = sn;
    state->rstate.ra.ptr.p_double[4] = v;
    state->rstate.ra.ptr.p_double[5] = vv;
    state->rstate.ra.ptr.p_double[6] = anrm;
    state->rstate.ra.ptr.p_double[7] = qnrm;
    state->rstate.ra.ptr.p_double[8] = bnrm;
    state->rstate.ra.ptr.p_double[9] = resnrm;
    state->rstate.ra.ptr.p_double[10] = prevresnrm;
    return result;
}


/*************************************************************************
Fast  least  squares  solver,  solves  well  conditioned  system   without
performing  any  checks  for  degeneracy,  and using user-provided buffers
(which are automatically reallocated if too small).

This  function  is  intended  for solution of moderately sized systems. It
uses factorization algorithms based on Level 2 BLAS  operations,  thus  it
won't work efficiently on large scale systems.

INPUT PARAMETERS:
    A       -   array[M,N], system matrix.
                Contents of A is destroyed during solution.
    B       -   array[M], right part
    M       -   number of equations
    N       -   number of variables, N<=M
    Tmp0, Tmp1, Tmp2-
                buffers; function automatically allocates them, if they are
                too  small. They can  be  reused  if  function  is   called
                several times.

OUTPUT PARAMETERS:
    B       -   solution (first N components, next M-N are zero)

  -- ALGLIB --
     Copyright 20.01.2012 by Bochkanov Sergey
*************************************************************************/
void fblssolvels(/* Real    */ ae_matrix* a,
     /* Real    */ ae_vector* b,
     ae_int_t m,
     ae_int_t n,
     /* Real    */ ae_vector* tmp0,
     /* Real    */ ae_vector* tmp1,
     /* Real    */ ae_vector* tmp2,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t k;
    double v;


    ae_assert(n>0, "FBLSSolveLS: N<=0", _state);
    ae_assert(m>=n, "FBLSSolveLS: M<N", _state);
    ae_assert(a->rows>=m, "FBLSSolveLS: Rows(A)<M", _state);
    ae_assert(a->cols>=n, "FBLSSolveLS: Cols(A)<N", _state);
    ae_assert(b->cnt>=m, "FBLSSolveLS: Length(B)<M", _state);

    /*
     * Allocate temporaries
     */
    rvectorsetlengthatleast(tmp0, ae_maxint(m, n, _state)+1, _state);
    rvectorsetlengthatleast(tmp1, ae_maxint(m, n, _state)+1, _state);
    rvectorsetlengthatleast(tmp2, ae_minint(m, n, _state), _state);

    /*
     * Call basecase QR
     */
    rmatrixqrbasecase(a, m, n, tmp0, tmp1, tmp2, _state);

    /*
     * Multiply B by Q'
     */
    for(k=0; k<=n-1; k++)
    {
        for(i=0; i<=k-1; i++)
        {
            tmp0->ptr.p_double[i] = (double)(0);
        }
        ae_v_move(&tmp0->ptr.p_double[k], 1, &a->ptr.pp_double[k][k], a->stride, ae_v_len(k,m-1));
        tmp0->ptr.p_double[k] = (double)(1);
        v = ae_v_dotproduct(&tmp0->ptr.p_double[k], 1, &b->ptr.p_double[k], 1, ae_v_len(k,m-1));
        v = v*tmp2->ptr.p_double[k];
        ae_v_subd(&b->ptr.p_double[k], 1, &tmp0->ptr.p_double[k], 1, ae_v_len(k,m-1), v);
    }

    /*
     * Solve triangular system
     */
    b->ptr.p_double[n-1] = b->ptr.p_double[n-1]/a->ptr.pp_double[n-1][n-1];
    for(i=n-2; i>=0; i--)
    {
        v = ae_v_dotproduct(&a->ptr.pp_double[i][i+1], 1, &b->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1));
        b->ptr.p_double[i] = (b->ptr.p_double[i]-v)/a->ptr.pp_double[i][i];
    }
    for(i=n; i<=m-1; i++)
    {
        b->ptr.p_double[i] = 0.0;
    }
}


void _fblslincgstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    fblslincgstate *p = (fblslincgstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->ax, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->rk, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->rk1, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->xk1, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->pk, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->pk1, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->b, 0, DT_REAL, _state, make_automatic);
    _rcommstate_init(&p->rstate, _state, make_automatic);
    ae_vector_init(&p->tmp2, 0, DT_REAL, _state, make_automatic);
}


void _fblslincgstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    fblslincgstate *dst = (fblslincgstate*)_dst;
    fblslincgstate *src = (fblslincgstate*)_src;
    dst->e1 = src->e1;
    dst->e2 = src->e2;
    ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
    ae_vector_init_copy(&dst->ax, &src->ax, _state, make_automatic);
    dst->xax = src->xax;
    dst->n = src->n;
    ae_vector_init_copy(&dst->rk, &src->rk, _state, make_automatic);
    ae_vector_init_copy(&dst->rk1, &src->rk1, _state, make_automatic);
    ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic);
    ae_vector_init_copy(&dst->xk1, &src->xk1, _state, make_automatic);
    ae_vector_init_copy(&dst->pk, &src->pk, _state, make_automatic);
    ae_vector_init_copy(&dst->pk1, &src->pk1, _state, make_automatic);
    ae_vector_init_copy(&dst->b, &src->b, _state, make_automatic);
    _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp2, &src->tmp2, _state, make_automatic);
}


void _fblslincgstate_clear(void* _p)
{
    fblslincgstate *p = (fblslincgstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->x);
    ae_vector_clear(&p->ax);
    ae_vector_clear(&p->rk);
    ae_vector_clear(&p->rk1);
    ae_vector_clear(&p->xk);
    ae_vector_clear(&p->xk1);
    ae_vector_clear(&p->pk);
    ae_vector_clear(&p->pk1);
    ae_vector_clear(&p->b);
    _rcommstate_clear(&p->rstate);
    ae_vector_clear(&p->tmp2);
}


void _fblslincgstate_destroy(void* _p)
{
    fblslincgstate *p = (fblslincgstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->x);
    ae_vector_destroy(&p->ax);
    ae_vector_destroy(&p->rk);
    ae_vector_destroy(&p->rk1);
    ae_vector_destroy(&p->xk);
    ae_vector_destroy(&p->xk1);
    ae_vector_destroy(&p->pk);
    ae_vector_destroy(&p->pk1);
    ae_vector_destroy(&p->b);
    _rcommstate_destroy(&p->rstate);
    ae_vector_destroy(&p->tmp2);
}


void _fblsgmresstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    fblsgmresstate *p = (fblsgmresstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->b, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->ax, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->xs, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->qi, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->aqi, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->h, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->hq, 0, 0, DT_REAL, _state, make_automatic);
    ae_matrix_init(&p->hr, 0, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->hqb, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->ys, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->tmp1, 0, DT_REAL, _state, make_automatic);
    _rcommstate_init(&p->rstate, _state, make_automatic);
}


void _fblsgmresstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    fblsgmresstate *dst = (fblsgmresstate*)_dst;
    fblsgmresstate *src = (fblsgmresstate*)_src;
    ae_vector_init_copy(&dst->b, &src->b, _state, make_automatic);
    ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
    ae_vector_init_copy(&dst->ax, &src->ax, _state, make_automatic);
    ae_vector_init_copy(&dst->xs, &src->xs, _state, make_automatic);
    ae_matrix_init_copy(&dst->qi, &src->qi, _state, make_automatic);
    ae_matrix_init_copy(&dst->aqi, &src->aqi, _state, make_automatic);
    ae_matrix_init_copy(&dst->h, &src->h, _state, make_automatic);
    ae_matrix_init_copy(&dst->hq, &src->hq, _state, make_automatic);
    ae_matrix_init_copy(&dst->hr, &src->hr, _state, make_automatic);
    ae_vector_init_copy(&dst->hqb, &src->hqb, _state, make_automatic);
    ae_vector_init_copy(&dst->ys, &src->ys, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
    ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic);
    dst->n = src->n;
    dst->itscnt = src->itscnt;
    dst->epsort = src->epsort;
    dst->epsres = src->epsres;
    dst->epsred = src->epsred;
    dst->epsdiag = src->epsdiag;
    dst->itsperformed = src->itsperformed;
    dst->retcode = src->retcode;
    _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
}


void _fblsgmresstate_clear(void* _p)
{
    fblsgmresstate *p = (fblsgmresstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->b);
    ae_vector_clear(&p->x);
    ae_vector_clear(&p->ax);
    ae_vector_clear(&p->xs);
    ae_matrix_clear(&p->qi);
    ae_matrix_clear(&p->aqi);
    ae_matrix_clear(&p->h);
    ae_matrix_clear(&p->hq);
    ae_matrix_clear(&p->hr);
    ae_vector_clear(&p->hqb);
    ae_vector_clear(&p->ys);
    ae_vector_clear(&p->tmp0);
    ae_vector_clear(&p->tmp1);
    _rcommstate_clear(&p->rstate);
}


void _fblsgmresstate_destroy(void* _p)
{
    fblsgmresstate *p = (fblsgmresstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->b);
    ae_vector_destroy(&p->x);
    ae_vector_destroy(&p->ax);
    ae_vector_destroy(&p->xs);
    ae_matrix_destroy(&p->qi);
    ae_matrix_destroy(&p->aqi);
    ae_matrix_destroy(&p->h);
    ae_matrix_destroy(&p->hq);
    ae_matrix_destroy(&p->hr);
    ae_vector_destroy(&p->hqb);
    ae_vector_destroy(&p->ys);
    ae_vector_destroy(&p->tmp0);
    ae_vector_destroy(&p->tmp1);
    _rcommstate_destroy(&p->rstate);
}


#endif
#if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
This procedure initializes matrix norm estimator.

USAGE:
1. User initializes algorithm state with NormEstimatorCreate() call
2. User calls NormEstimatorEstimateSparse() (or NormEstimatorIteration())
3. User calls NormEstimatorResults() to get solution.

INPUT PARAMETERS:
    M       -   number of rows in the matrix being estimated, M>0
    N       -   number of columns in the matrix being estimated, N>0
    NStart  -   number of random starting vectors
                recommended value - at least 5.
    NIts    -   number of iterations to do with best starting vector
                recommended value - at least 5.

OUTPUT PARAMETERS:
    State   -   structure which stores algorithm state


NOTE: this algorithm is effectively deterministic, i.e. it always  returns
same result when repeatedly called for the same matrix. In fact, algorithm
uses randomized starting vectors, but internal  random  numbers  generator
always generates same sequence of the random values (it is a  feature, not
bug).

Algorithm can be made non-deterministic with NormEstimatorSetSeed(0) call.

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorcreate(ae_int_t m,
     ae_int_t n,
     ae_int_t nstart,
     ae_int_t nits,
     normestimatorstate* state,
     ae_state *_state)
{

    _normestimatorstate_clear(state);

    ae_assert(m>0, "NormEstimatorCreate: M<=0", _state);
    ae_assert(n>0, "NormEstimatorCreate: N<=0", _state);
    ae_assert(nstart>0, "NormEstimatorCreate: NStart<=0", _state);
    ae_assert(nits>0, "NormEstimatorCreate: NIts<=0", _state);
    state->m = m;
    state->n = n;
    state->nstart = nstart;
    state->nits = nits;
    state->seedval = 11;
    hqrndrandomize(&state->r, _state);
    ae_vector_set_length(&state->x0, state->n, _state);
    ae_vector_set_length(&state->t, state->m, _state);
    ae_vector_set_length(&state->x1, state->n, _state);
    ae_vector_set_length(&state->xbest, state->n, _state);
    ae_vector_set_length(&state->x, ae_maxint(state->n, state->m, _state), _state);
    ae_vector_set_length(&state->mv, state->m, _state);
    ae_vector_set_length(&state->mtv, state->n, _state);
    ae_vector_set_length(&state->rstate.ia, 3+1, _state);
    ae_vector_set_length(&state->rstate.ra, 2+1, _state);
    state->rstate.stage = -1;
}


/*************************************************************************
This function changes seed value used by algorithm. In some cases we  need
deterministic processing, i.e. subsequent calls must return equal results,
in other cases we need non-deterministic algorithm which returns different
results for the same matrix on every pass.

Setting zero seed will lead to non-deterministic algorithm, while non-zero
value will make our algorithm deterministic.

INPUT PARAMETERS:
    State       -   norm estimator state, must be initialized with a  call
                    to NormEstimatorCreate()
    SeedVal     -   seed value, >=0. Zero value = non-deterministic algo.

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorsetseed(normestimatorstate* state,
     ae_int_t seedval,
     ae_state *_state)
{


    ae_assert(seedval>=0, "NormEstimatorSetSeed: SeedVal<0", _state);
    state->seedval = seedval;
}


/*************************************************************************

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
ae_bool normestimatoriteration(normestimatorstate* state,
     ae_state *_state)
{
    ae_int_t n;
    ae_int_t m;
    ae_int_t i;
    ae_int_t itcnt;
    double v;
    double growth;
    double bestgrowth;
    ae_bool result;


    /*
     * Reverse communication preparations
     * I know it looks ugly, but it works the same way
     * anywhere from C++ to Python.
     *
     * This code initializes locals by:
     * * random values determined during code
     *   generation - on first subroutine call
     * * values from previous call - on subsequent calls
     */
    if( state->rstate.stage>=0 )
    {
        n = state->rstate.ia.ptr.p_int[0];
        m = state->rstate.ia.ptr.p_int[1];
        i = state->rstate.ia.ptr.p_int[2];
        itcnt = state->rstate.ia.ptr.p_int[3];
        v = state->rstate.ra.ptr.p_double[0];
        growth = state->rstate.ra.ptr.p_double[1];
        bestgrowth = state->rstate.ra.ptr.p_double[2];
    }
    else
    {
        n = 359;
        m = -58;
        i = -919;
        itcnt = -909;
        v = 81;
        growth = 255;
        bestgrowth = 74;
    }
    if( state->rstate.stage==0 )
    {
        goto lbl_0;
    }
    if( state->rstate.stage==1 )
    {
        goto lbl_1;
    }
    if( state->rstate.stage==2 )
    {
        goto lbl_2;
    }
    if( state->rstate.stage==3 )
    {
        goto lbl_3;
    }

    /*
     * Routine body
     */
    n = state->n;
    m = state->m;
    if( state->seedval>0 )
    {
        hqrndseed(state->seedval, state->seedval+2, &state->r, _state);
    }
    bestgrowth = (double)(0);
    state->xbest.ptr.p_double[0] = (double)(1);
    for(i=1; i<=n-1; i++)
    {
        state->xbest.ptr.p_double[i] = (double)(0);
    }
    itcnt = 0;
lbl_4:
    if( itcnt>state->nstart-1 )
    {
        goto lbl_6;
    }
    do
    {
        v = (double)(0);
        for(i=0; i<=n-1; i++)
        {
            state->x0.ptr.p_double[i] = hqrndnormal(&state->r, _state);
            v = v+ae_sqr(state->x0.ptr.p_double[i], _state);
        }
    }
    while(ae_fp_eq(v,(double)(0)));
    v = 1/ae_sqrt(v, _state);
    ae_v_muld(&state->x0.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
    ae_v_move(&state->x.ptr.p_double[0], 1, &state->x0.ptr.p_double[0], 1, ae_v_len(0,n-1));
    state->needmv = ae_true;
    state->needmtv = ae_false;
    state->rstate.stage = 0;
    goto lbl_rcomm;
lbl_0:
    ae_v_move(&state->x.ptr.p_double[0], 1, &state->mv.ptr.p_double[0], 1, ae_v_len(0,m-1));
    state->needmv = ae_false;
    state->needmtv = ae_true;
    state->rstate.stage = 1;
    goto lbl_rcomm;
lbl_1:
    ae_v_move(&state->x1.ptr.p_double[0], 1, &state->mtv.ptr.p_double[0], 1, ae_v_len(0,n-1));
    v = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        v = v+ae_sqr(state->x1.ptr.p_double[i], _state);
    }
    growth = ae_sqrt(ae_sqrt(v, _state), _state);
    if( ae_fp_greater(growth,bestgrowth) )
    {
        v = 1/ae_sqrt(v, _state);
        ae_v_moved(&state->xbest.ptr.p_double[0], 1, &state->x1.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
        bestgrowth = growth;
    }
    itcnt = itcnt+1;
    goto lbl_4;
lbl_6:
    ae_v_move(&state->x0.ptr.p_double[0], 1, &state->xbest.ptr.p_double[0], 1, ae_v_len(0,n-1));
    itcnt = 0;
lbl_7:
    if( itcnt>state->nits-1 )
    {
        goto lbl_9;
    }
    ae_v_move(&state->x.ptr.p_double[0], 1, &state->x0.ptr.p_double[0], 1, ae_v_len(0,n-1));
    state->needmv = ae_true;
    state->needmtv = ae_false;
    state->rstate.stage = 2;
    goto lbl_rcomm;
lbl_2:
    ae_v_move(&state->x.ptr.p_double[0], 1, &state->mv.ptr.p_double[0], 1, ae_v_len(0,m-1));
    state->needmv = ae_false;
    state->needmtv = ae_true;
    state->rstate.stage = 3;
    goto lbl_rcomm;
lbl_3:
    ae_v_move(&state->x1.ptr.p_double[0], 1, &state->mtv.ptr.p_double[0], 1, ae_v_len(0,n-1));
    v = (double)(0);
    for(i=0; i<=n-1; i++)
    {
        v = v+ae_sqr(state->x1.ptr.p_double[i], _state);
    }
    state->repnorm = ae_sqrt(ae_sqrt(v, _state), _state);
    if( ae_fp_neq(v,(double)(0)) )
    {
        v = 1/ae_sqrt(v, _state);
        ae_v_moved(&state->x0.ptr.p_double[0], 1, &state->x1.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
    }
    itcnt = itcnt+1;
    goto lbl_7;
lbl_9:
    result = ae_false;
    return result;

    /*
     * Saving state
     */
lbl_rcomm:
    result = ae_true;
    state->rstate.ia.ptr.p_int[0] = n;
    state->rstate.ia.ptr.p_int[1] = m;
    state->rstate.ia.ptr.p_int[2] = i;
    state->rstate.ia.ptr.p_int[3] = itcnt;
    state->rstate.ra.ptr.p_double[0] = v;
    state->rstate.ra.ptr.p_double[1] = growth;
    state->rstate.ra.ptr.p_double[2] = bestgrowth;
    return result;
}


/*************************************************************************
This function estimates norm of the sparse M*N matrix A.

INPUT PARAMETERS:
    State       -   norm estimator state, must be initialized with a  call
                    to NormEstimatorCreate()
    A           -   sparse M*N matrix, must be converted to CRS format
                    prior to calling this function.

After this function  is  over  you can call NormEstimatorResults() to get
estimate of the norm(A).

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorestimatesparse(normestimatorstate* state,
     sparsematrix* a,
     ae_state *_state)
{


    normestimatorrestart(state, _state);
    while(normestimatoriteration(state, _state))
    {
        if( state->needmv )
        {
            sparsemv(a, &state->x, &state->mv, _state);
            continue;
        }
        if( state->needmtv )
        {
            sparsemtv(a, &state->x, &state->mtv, _state);
            continue;
        }
    }
}


/*************************************************************************
Matrix norm estimation results

INPUT PARAMETERS:
    State   -   algorithm state

OUTPUT PARAMETERS:
    Nrm     -   estimate of the matrix norm, Nrm>=0

  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorresults(normestimatorstate* state,
     double* nrm,
     ae_state *_state)
{

    *nrm = 0;

    *nrm = state->repnorm;
}


/*************************************************************************
This  function  restarts estimator and prepares it for the next estimation
round.

INPUT PARAMETERS:
    State   -   algorithm state
  -- ALGLIB --
     Copyright 06.12.2011 by Bochkanov Sergey
*************************************************************************/
void normestimatorrestart(normestimatorstate* state, ae_state *_state)
{


    ae_vector_set_length(&state->rstate.ia, 3+1, _state);
    ae_vector_set_length(&state->rstate.ra, 2+1, _state);
    state->rstate.stage = -1;
}


void _normestimatorstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    normestimatorstate *p = (normestimatorstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_init(&p->x0, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->x1, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->t, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->xbest, 0, DT_REAL, _state, make_automatic);
    _hqrndstate_init(&p->r, _state, make_automatic);
    ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->mv, 0, DT_REAL, _state, make_automatic);
    ae_vector_init(&p->mtv, 0, DT_REAL, _state, make_automatic);
    _rcommstate_init(&p->rstate, _state, make_automatic);
}


void _normestimatorstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    normestimatorstate *dst = (normestimatorstate*)_dst;
    normestimatorstate *src = (normestimatorstate*)_src;
    dst->n = src->n;
    dst->m = src->m;
    dst->nstart = src->nstart;
    dst->nits = src->nits;
    dst->seedval = src->seedval;
    ae_vector_init_copy(&dst->x0, &src->x0, _state, make_automatic);
    ae_vector_init_copy(&dst->x1, &src->x1, _state, make_automatic);
    ae_vector_init_copy(&dst->t, &src->t, _state, make_automatic);
    ae_vector_init_copy(&dst->xbest, &src->xbest, _state, make_automatic);
    _hqrndstate_init_copy(&dst->r, &src->r, _state, make_automatic);
    ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
    ae_vector_init_copy(&dst->mv, &src->mv, _state, make_automatic);
    ae_vector_init_copy(&dst->mtv, &src->mtv, _state, make_automatic);
    dst->needmv = src->needmv;
    dst->needmtv = src->needmtv;
    dst->repnorm = src->repnorm;
    _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
}


void _normestimatorstate_clear(void* _p)
{
    normestimatorstate *p = (normestimatorstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_clear(&p->x0);
    ae_vector_clear(&p->x1);
    ae_vector_clear(&p->t);
    ae_vector_clear(&p->xbest);
    _hqrndstate_clear(&p->r);
    ae_vector_clear(&p->x);
    ae_vector_clear(&p->mv);
    ae_vector_clear(&p->mtv);
    _rcommstate_clear(&p->rstate);
}


void _normestimatorstate_destroy(void* _p)
{
    normestimatorstate *p = (normestimatorstate*)_p;
    ae_touch_ptr((void*)p);
    ae_vector_destroy(&p->x0);
    ae_vector_destroy(&p->x1);
    ae_vector_destroy(&p->t);
    ae_vector_destroy(&p->xbest);
    _hqrndstate_destroy(&p->r);
    ae_vector_destroy(&p->x);
    ae_vector_destroy(&p->mv);
    ae_vector_destroy(&p->mtv);
    _rcommstate_destroy(&p->rstate);
}


#endif
#if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Inversion of a matrix given by its LU decomposition.

INPUT PARAMETERS:
    A       -   LU decomposition of the matrix
                (output of RMatrixLU subroutine).
    Pivots  -   table of permutations
                (the output of RMatrixLU subroutine).
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

OUTPUT PARAMETERS:
    Info    -   return code:
                * -3    A is singular, or VERY close to singular.
                        it is filled by zeros in such cases.
                *  1    task is solved (but matrix A may be ill-conditioned,
                        check R1/RInf parameters for condition numbers).
    Rep     -   solver report, see below for more info
    A       -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].

SOLVER REPORT

Subroutine sets following fields of the Rep structure:
* R1        reciprocal of condition number: 1/cond(A), 1-norm.
* RInf      reciprocal of condition number: 1/cond(A), inf-norm.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     05.02.2010
     Bochkanov Sergey
*************************************************************************/
void rmatrixluinverse(/* Real    */ ae_matrix* a,
     /* Integer */ ae_vector* pivots,
     ae_int_t n,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    double v;
    sinteger sinfo;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&sinfo, 0, sizeof(sinfo));
    *info = 0;
    _matinvreport_clear(rep);
    ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
    _sinteger_init(&sinfo, _state, ae_true);

    ae_assert(n>0, "RMatrixLUInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "RMatrixLUInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "RMatrixLUInverse: rows(A)<N!", _state);
    ae_assert(pivots->cnt>=n, "RMatrixLUInverse: len(Pivots)<N!", _state);
    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUInverse: A contains infinite or NaN values!", _state);
    *info = 1;
    for(i=0; i<=n-1; i++)
    {
        if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i )
        {
            *info = -1;
        }
    }
    ae_assert(*info>0, "RMatrixLUInverse: incorrect Pivots array!", _state);

    /*
     * calculate condition numbers
     */
    rep->r1 = rmatrixlurcond1(a, n, _state);
    rep->rinf = rmatrixlurcondinf(a, n, _state);
    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                a->ptr.pp_double[i][j] = (double)(0);
            }
        }
        rep->r1 = (double)(0);
        rep->rinf = (double)(0);
        *info = -3;
        ae_frame_leave(_state);
        return;
    }

    /*
     * Call cache-oblivious code
     */
    ae_vector_set_length(&work, n, _state);
    sinfo.val = 1;
    matinv_rmatrixluinverserec(a, 0, n, &work, &sinfo, rep, _state);
    *info = sinfo.val;

    /*
     * apply permutations
     */
    for(i=0; i<=n-1; i++)
    {
        for(j=n-2; j>=0; j--)
        {
            k = pivots->ptr.p_int[j];
            v = a->ptr.pp_double[i][j];
            a->ptr.pp_double[i][j] = a->ptr.pp_double[i][k];
            a->ptr.pp_double[i][k] = v;
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Inversion of a general matrix.

Input parameters:
    A       -   matrix.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

Result:
    True, if the matrix is not singular.
    False, if the matrix is singular.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 2005-2010 by Bochkanov Sergey
*************************************************************************/
void rmatrixinverse(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector pivots;

    ae_frame_make(_state, &_frame_block);
    memset(&pivots, 0, sizeof(pivots));
    *info = 0;
    _matinvreport_clear(rep);
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>0, "RMatrixInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "RMatrixInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "RMatrixInverse: rows(A)<N!", _state);
    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixInverse: A contains infinite or NaN values!", _state);
    rmatrixlu(a, n, n, &pivots, _state);
    rmatrixluinverse(a, &pivots, n, info, rep, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Inversion of a matrix given by its LU decomposition.

INPUT PARAMETERS:
    A       -   LU decomposition of the matrix
                (output of CMatrixLU subroutine).
    Pivots  -   table of permutations
                (the output of CMatrixLU subroutine).
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

OUTPUT PARAMETERS:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     05.02.2010
     Bochkanov Sergey
*************************************************************************/
void cmatrixluinverse(/* Complex */ ae_matrix* a,
     /* Integer */ ae_vector* pivots,
     ae_int_t n,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector work;
    ae_int_t i;
    ae_int_t j;
    ae_int_t k;
    ae_complex v;
    sinteger sinfo;

    ae_frame_make(_state, &_frame_block);
    memset(&work, 0, sizeof(work));
    memset(&sinfo, 0, sizeof(sinfo));
    *info = 0;
    _matinvreport_clear(rep);
    ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
    _sinteger_init(&sinfo, _state, ae_true);

    ae_assert(n>0, "CMatrixLUInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "CMatrixLUInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "CMatrixLUInverse: rows(A)<N!", _state);
    ae_assert(pivots->cnt>=n, "CMatrixLUInverse: len(Pivots)<N!", _state);
    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUInverse: A contains infinite or NaN values!", _state);
    *info = 1;
    for(i=0; i<=n-1; i++)
    {
        if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i )
        {
            *info = -1;
        }
    }
    ae_assert(*info>0, "CMatrixLUInverse: incorrect Pivots array!", _state);

    /*
     * calculate condition numbers
     */
    rep->r1 = cmatrixlurcond1(a, n, _state);
    rep->rinf = cmatrixlurcondinf(a, n, _state);
    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
            }
        }
        rep->r1 = (double)(0);
        rep->rinf = (double)(0);
        *info = -3;
        ae_frame_leave(_state);
        return;
    }

    /*
     * Call cache-oblivious code
     */
    ae_vector_set_length(&work, n, _state);
    sinfo.val = 1;
    matinv_cmatrixluinverserec(a, 0, n, &work, &sinfo, rep, _state);
    *info = sinfo.val;

    /*
     * apply permutations
     */
    for(i=0; i<=n-1; i++)
    {
        for(j=n-2; j>=0; j--)
        {
            k = pivots->ptr.p_int[j];
            v = a->ptr.pp_complex[i][j];
            a->ptr.pp_complex[i][j] = a->ptr.pp_complex[i][k];
            a->ptr.pp_complex[i][k] = v;
        }
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Inversion of a general matrix.

Input parameters:
    A       -   matrix
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void cmatrixinverse(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector pivots;

    ae_frame_make(_state, &_frame_block);
    memset(&pivots, 0, sizeof(pivots));
    *info = 0;
    _matinvreport_clear(rep);
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>0, "CRMatrixInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "CRMatrixInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "CRMatrixInverse: rows(A)<N!", _state);
    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixInverse: A contains infinite or NaN values!", _state);
    cmatrixlu(a, n, n, &pivots, _state);
    cmatrixluinverse(a, &pivots, n, info, rep, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Inversion of a symmetric positive definite matrix which is given
by Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition of the matrix to be inverted:
                A=U'*U or A = L*L'.
                Output of  SPDMatrixCholesky subroutine.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given, lower half is used.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void spdmatrixcholeskyinverse(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_vector tmp;
    matinvreport rep2;
    ae_bool f;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    memset(&rep2, 0, sizeof(rep2));
    *info = 0;
    _matinvreport_clear(rep);
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
    _matinvreport_init(&rep2, _state, ae_true);

    ae_assert(n>0, "SPDMatrixCholeskyInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "SPDMatrixCholeskyInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "SPDMatrixCholeskyInverse: rows(A)<N!", _state);
    *info = 1;
    f = ae_true;
    for(i=0; i<=n-1; i++)
    {
        f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state);
    }
    ae_assert(f, "SPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state);

    /*
     * calculate condition numbers
     */
    rep->r1 = spdmatrixcholeskyrcond(a, n, isupper, _state);
    rep->rinf = rep->r1;
    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
    {
        if( isupper )
        {
            for(i=0; i<=n-1; i++)
            {
                for(j=i; j<=n-1; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=i; j++)
                {
                    a->ptr.pp_double[i][j] = (double)(0);
                }
            }
        }
        rep->r1 = (double)(0);
        rep->rinf = (double)(0);
        *info = -3;
        ae_frame_leave(_state);
        return;
    }

    /*
     * Inverse
     */
    ae_vector_set_length(&tmp, n, _state);
    spdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Inversion of a symmetric positive definite matrix.

Given an upper or lower triangle of a symmetric positive definite matrix,
the algorithm generates matrix A^-1 and saves the upper or lower triangle
depending on the input.

Input parameters:
    A       -   matrix to be inverted (upper or lower triangle).
                Array with elements [0..N-1,0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given,  both lower and upper  triangles  must  be
                  filled.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void spdmatrixinverse(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{

    *info = 0;
    _matinvreport_clear(rep);

    ae_assert(n>0, "SPDMatrixInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "SPDMatrixInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "SPDMatrixInverse: rows(A)<N!", _state);
    ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixInverse: A contains infinite or NaN values!", _state);
    *info = 1;
    if( spdmatrixcholesky(a, n, isupper, _state) )
    {
        spdmatrixcholeskyinverse(a, n, isupper, info, rep, _state);
    }
    else
    {
        *info = -3;
    }
}


/*************************************************************************
Inversion of a Hermitian positive definite matrix which is given
by Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition of the matrix to be inverted:
                A=U'*U or A = L*L'.
                Output of  HPDMatrixCholesky subroutine.
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given, lower half is used.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void hpdmatrixcholeskyinverse(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    matinvreport rep2;
    ae_vector tmp;
    ae_bool f;

    ae_frame_make(_state, &_frame_block);
    memset(&rep2, 0, sizeof(rep2));
    memset(&tmp, 0, sizeof(tmp));
    *info = 0;
    _matinvreport_clear(rep);
    _matinvreport_init(&rep2, _state, ae_true);
    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);

    ae_assert(n>0, "HPDMatrixCholeskyInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "HPDMatrixCholeskyInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "HPDMatrixCholeskyInverse: rows(A)<N!", _state);
    f = ae_true;
    for(i=0; i<=n-1; i++)
    {
        f = (f&&ae_isfinite(a->ptr.pp_complex[i][i].x, _state))&&ae_isfinite(a->ptr.pp_complex[i][i].y, _state);
    }
    ae_assert(f, "HPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state);
    *info = 1;

    /*
     * calculate condition numbers
     */
    rep->r1 = hpdmatrixcholeskyrcond(a, n, isupper, _state);
    rep->rinf = rep->r1;
    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
    {
        if( isupper )
        {
            for(i=0; i<=n-1; i++)
            {
                for(j=i; j<=n-1; j++)
                {
                    a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
                }
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=i; j++)
                {
                    a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
                }
            }
        }
        rep->r1 = (double)(0);
        rep->rinf = (double)(0);
        *info = -3;
        ae_frame_leave(_state);
        return;
    }

    /*
     * Inverse
     */
    ae_vector_set_length(&tmp, n, _state);
    matinv_hpdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Inversion of a Hermitian positive definite matrix.

Given an upper or lower triangle of a Hermitian positive definite matrix,
the algorithm generates matrix A^-1 and saves the upper or lower triangle
depending on the input.

Input parameters:
    A       -   matrix to be inverted (upper or lower triangle).
                Array with elements [0..N-1,0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   storage type (optional):
                * if True, symmetric  matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't  used/changed  by
                  function
                * if False,  symmetric matrix  A  is  given  by  its lower
                  triangle, and the  upper triangle isn't used/changed  by
                  function
                * if not given,  both lower and upper  triangles  must  be
                  filled.

Output parameters:
    Info    -   return code, same as in RMatrixLUInverse
    Rep     -   solver report, same as in RMatrixLUInverse
    A       -   inverse of matrix A, same as in RMatrixLUInverse

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void hpdmatrixinverse(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{

    *info = 0;
    _matinvreport_clear(rep);

    ae_assert(n>0, "HPDMatrixInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "HPDMatrixInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "HPDMatrixInverse: rows(A)<N!", _state);
    ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "HPDMatrixInverse: A contains infinite or NaN values!", _state);
    *info = 1;
    if( hpdmatrixcholesky(a, n, isupper, _state) )
    {
        hpdmatrixcholeskyinverse(a, n, isupper, info, rep, _state);
    }
    else
    {
        *info = -3;
    }
}


/*************************************************************************
Triangular matrix inverse (real)

The subroutine inverts the following types of matrices:
    * upper triangular
    * upper triangular with unit diagonal
    * lower triangular
    * lower triangular with unit diagonal

In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
also be upper (lower) triangular, and after the end of the algorithm,  the
inverse matrix replaces the source matrix. The elements  below (above) the
main diagonal are not changed by the algorithm.

If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
diagonal, and the diagonal elements are not passed to the algorithm.

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   diagonal type (optional):
                * if True, matrix has unit diagonal (a[i,i] are NOT used)
                * if False, matrix diagonal is arbitrary
                * if not given, False is assumed

Output parameters:
    Info    -   same as for RMatrixLUInverse
    Rep     -   same as for RMatrixLUInverse
    A       -   same as for RMatrixLUInverse.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 05.02.2010 by Bochkanov Sergey
*************************************************************************/
void rmatrixtrinverse(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_vector tmp;
    sinteger sinfo;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    memset(&sinfo, 0, sizeof(sinfo));
    *info = 0;
    _matinvreport_clear(rep);
    ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
    _sinteger_init(&sinfo, _state, ae_true);

    ae_assert(n>0, "RMatrixTRInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "RMatrixTRInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "RMatrixTRInverse: rows(A)<N!", _state);
    ae_assert(isfinitertrmatrix(a, n, isupper, _state), "RMatrixTRInverse: A contains infinite or NaN values!", _state);

    /*
     * calculate condition numbers
     */
    rep->r1 = rmatrixtrrcond1(a, n, isupper, isunit, _state);
    rep->rinf = rmatrixtrrcondinf(a, n, isupper, isunit, _state);
    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                a->ptr.pp_double[i][j] = (double)(0);
            }
        }
        rep->r1 = (double)(0);
        rep->rinf = (double)(0);
        *info = -3;
        ae_frame_leave(_state);
        return;
    }

    /*
     * Invert
     */
    ae_vector_set_length(&tmp, n, _state);
    sinfo.val = 1;
    matinv_rmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, &sinfo, _state);
    *info = sinfo.val;
    ae_frame_leave(_state);
}


/*************************************************************************
Triangular matrix inverse (complex)

The subroutine inverts the following types of matrices:
    * upper triangular
    * upper triangular with unit diagonal
    * lower triangular
    * lower triangular with unit diagonal

In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
also be upper (lower) triangular, and after the end of the algorithm,  the
inverse matrix replaces the source matrix. The elements  below (above) the
main diagonal are not changed by the algorithm.

If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
diagonal, and the diagonal elements are not passed to the algorithm.

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1].
    N       -   size of matrix A (optional) :
                * if given, only principal NxN submatrix is processed  and
                  overwritten. other elements are unchanged.
                * if not given,  size  is  automatically  determined  from
                  matrix size (A must be square matrix)
    IsUpper -   True, if the matrix is upper triangular.
    IsUnit  -   diagonal type (optional):
                * if True, matrix has unit diagonal (a[i,i] are NOT used)
                * if False, matrix diagonal is arbitrary
                * if not given, False is assumed

Output parameters:
    Info    -   same as for RMatrixLUInverse
    Rep     -   same as for RMatrixLUInverse
    A       -   same as for RMatrixLUInverse.

  ! FREE EDITION OF ALGLIB:
  !
  ! Free Edition of ALGLIB supports following important features for  this
  ! function:
  ! * C++ version: x64 SIMD support using C++ intrinsics
  ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
  !
  ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
  ! Reference Manual in order  to  find  out  how to activate SIMD support
  ! in ALGLIB.

  ! COMMERCIAL EDITION OF ALGLIB:
  !
  ! Commercial Edition of ALGLIB includes following important improvements
  ! of this function:
  ! * high-performance native backend with same C# interface (C# version)
  ! * multithreading support (C++ and C# versions)
  ! * hardware vendor (Intel) implementations of linear algebra primitives
  !   (C++ and C# versions, x86/x64 platform)
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

  -- ALGLIB --
     Copyright 05.02.2010 by Bochkanov Sergey
*************************************************************************/
void cmatrixtrinverse(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     ae_int_t* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_vector tmp;
    sinteger sinfo;

    ae_frame_make(_state, &_frame_block);
    memset(&tmp, 0, sizeof(tmp));
    memset(&sinfo, 0, sizeof(sinfo));
    *info = 0;
    _matinvreport_clear(rep);
    ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
    _sinteger_init(&sinfo, _state, ae_true);

    ae_assert(n>0, "CMatrixTRInverse: N<=0!", _state);
    ae_assert(a->cols>=n, "CMatrixTRInverse: cols(A)<N!", _state);
    ae_assert(a->rows>=n, "CMatrixTRInverse: rows(A)<N!", _state);
    ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "CMatrixTRInverse: A contains infinite or NaN values!", _state);

    /*
     * calculate condition numbers
     */
    rep->r1 = cmatrixtrrcond1(a, n, isupper, isunit, _state);
    rep->rinf = cmatrixtrrcondinf(a, n, isupper, isunit, _state);
    if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
    {
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=n-1; j++)
            {
                a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
            }
        }
        rep->r1 = (double)(0);
        rep->rinf = (double)(0);
        *info = -3;
        ae_frame_leave(_state);
        return;
    }

    /*
     * Invert
     */
    ae_vector_set_length(&tmp, n, _state);
    sinfo.val = 1;
    matinv_cmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, &sinfo, _state);
    *info = sinfo.val;
    ae_frame_leave(_state);
}


/*************************************************************************
Recursive subroutine for SPD inversion.

NOTE: this function expects that matris is strictly positive-definite.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
void spdmatrixcholeskyinverserec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Real    */ ae_vector* tmp,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    double v;
    ae_int_t n1;
    ae_int_t n2;
    sinteger sinfo2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;

    ae_frame_make(_state, &_frame_block);
    memset(&sinfo2, 0, sizeof(sinfo2));
    _sinteger_init(&sinfo2, _state, ae_true);

    if( n<1 )
    {
        ae_frame_leave(_state);
        return;
    }
    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( n<=tsb )
    {
        tscur = tsa;
    }

    /*
     * Base case
     */
    if( n<=tsa )
    {
        sinfo2.val = 1;
        matinv_rmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &sinfo2, _state);
        ae_assert(sinfo2.val>0, "SPDMatrixCholeskyInverseRec: integrity check failed", _state);
        if( isupper )
        {

            /*
             * Compute the product U * U'.
             * NOTE: we never assume that diagonal of U is real
             */
            for(i=0; i<=n-1; i++)
            {
                if( i==0 )
                {

                    /*
                     * 1x1 matrix
                     */
                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
                }
                else
                {

                    /*
                     * (I+1)x(I+1) matrix,
                     *
                     * ( A11  A12 )   ( A11^H        )   ( A11*A11^H+A12*A12^H  A12*A22^H )
                     * (          ) * (              ) = (                                )
                     * (      A22 )   ( A12^H  A22^H )   ( A22*A12^H            A22*A22^H )
                     *
                     * A11 is IxI, A22 is 1x1.
                     */
                    ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(0,i-1));
                    for(j=0; j<=i-1; j++)
                    {
                        v = a->ptr.pp_double[offs+j][offs+i];
                        ae_v_addd(&a->ptr.pp_double[offs+j][offs+j], 1, &tmp->ptr.p_double[j], 1, ae_v_len(offs+j,offs+i-1), v);
                    }
                    v = a->ptr.pp_double[offs+i][offs+i];
                    ae_v_muld(&a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v);
                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
                }
            }
        }
        else
        {

            /*
             * Compute the product L' * L
             * NOTE: we never assume that diagonal of L is real
             */
            for(i=0; i<=n-1; i++)
            {
                if( i==0 )
                {

                    /*
                     * 1x1 matrix
                     */
                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
                }
                else
                {

                    /*
                     * (I+1)x(I+1) matrix,
                     *
                     * ( A11^H  A21^H )   ( A11      )   ( A11^H*A11+A21^H*A21  A21^H*A22 )
                     * (              ) * (          ) = (                                )
                     * (        A22^H )   ( A21  A22 )   ( A22^H*A21            A22^H*A22 )
                     *
                     * A11 is IxI, A22 is 1x1.
                     */
                    ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs], 1, ae_v_len(0,i-1));
                    for(j=0; j<=i-1; j++)
                    {
                        v = a->ptr.pp_double[offs+i][offs+j];
                        ae_v_addd(&a->ptr.pp_double[offs+j][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+j), v);
                    }
                    v = a->ptr.pp_double[offs+i][offs+i];
                    ae_v_muld(&a->ptr.pp_double[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v);
                    a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
                }
            }
        }
        ae_frame_leave(_state);
        return;
    }

    /*
     * Recursive code: triangular factor inversion merged with
     * UU' or L'L multiplication
     */
    tiledsplit(n, tscur, &n1, &n2, _state);

    /*
     * form off-diagonal block of trangular inverse
     */
    if( isupper )
    {
        for(i=0; i<=n1-1; i++)
        {
            ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
        }
        rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state);
        rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state);
    }
    else
    {
        for(i=0; i<=n2-1; i++)
        {
            ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
        }
        rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state);
        rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state);
    }

    /*
     * invert first diagonal block
     */
    spdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state);

    /*
     * update first diagonal block with off-diagonal block,
     * update off-diagonal block
     */
    if( isupper )
    {
        rmatrixsyrk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state);
        rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs, offs+n1, _state);
    }
    else
    {
        rmatrixsyrk(n1, n2, 1.0, a, offs+n1, offs, 1, 1.0, a, offs, offs, isupper, _state);
        rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs+n1, offs, _state);
    }

    /*
     * invert second diagonal block
     */
    spdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state);
    ae_frame_leave(_state);
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_spdmatrixcholeskyinverserec(/* Real    */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    ae_bool isupper,
    /* Real    */ ae_vector* tmp,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
Triangular matrix inversion, recursive subroutine

NOTE: this function sets Info on failure, leaves it unchanged on success.

NOTE: only Tmp[Offs:Offs+N-1] is modified, other entries of the temporary array are not modified

  -- ALGLIB --
     05.02.2010, Bochkanov Sergey.
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992.
*************************************************************************/
static void matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     /* Real    */ ae_vector* tmp,
     sinteger* info,
     ae_state *_state)
{
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t mn;
    ae_int_t i;
    ae_int_t j;
    double v;
    double ajj;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    if( n<1 )
    {
        info->val = -1;
        return;
    }
    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( n<=tsb )
    {
        tscur = tsa;
    }

    /*
     * Try to activate parallelism
     */
    if( n>=2*tsb&&ae_fp_greater_eq(rmul3((double)(n), (double)(n), (double)(n), _state)*((double)1/(double)3),smpactivationlevel(_state)) )
    {
        if( _trypexec_matinv_rmatrixtrinverserec(a,offs,n,isupper,isunit,tmp,info, _state) )
        {
            return;
        }
    }

    /*
     * Base case
     */
    if( n<=tsa )
    {
        if( isupper )
        {

            /*
             * Compute inverse of upper triangular matrix.
             */
            for(j=0; j<=n-1; j++)
            {
                if( !isunit )
                {
                    if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],(double)(0)) )
                    {
                        info->val = -3;
                        return;
                    }
                    a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j];
                    ajj = -a->ptr.pp_double[offs+j][offs+j];
                }
                else
                {
                    ajj = (double)(-1);
                }

                /*
                 * Compute elements 1:j-1 of j-th column.
                 */
                if( j>0 )
                {
                    ae_v_move(&tmp->ptr.p_double[offs+0], 1, &a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1));
                    for(i=0; i<=j-1; i++)
                    {
                        if( i<j-1 )
                        {
                            v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+i+1], 1, &tmp->ptr.p_double[offs+i+1], 1, ae_v_len(offs+i+1,offs+j-1));
                        }
                        else
                        {
                            v = (double)(0);
                        }
                        if( !isunit )
                        {
                            a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[offs+i];
                        }
                        else
                        {
                            a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[offs+i];
                        }
                    }
                    ae_v_muld(&a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj);
                }
            }
        }
        else
        {

            /*
             * Compute inverse of lower triangular matrix.
             */
            for(j=n-1; j>=0; j--)
            {
                if( !isunit )
                {
                    if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],(double)(0)) )
                    {
                        info->val = -3;
                        return;
                    }
                    a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j];
                    ajj = -a->ptr.pp_double[offs+j][offs+j];
                }
                else
                {
                    ajj = (double)(-1);
                }
                if( j<n-1 )
                {

                    /*
                     * Compute elements j+1:n of j-th column.
                     */
                    ae_v_move(&tmp->ptr.p_double[offs+j+1], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1));
                    for(i=j+1; i<=n-1; i++)
                    {
                        if( i>j+1 )
                        {
                            v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &tmp->ptr.p_double[offs+j+1], 1, ae_v_len(offs+j+1,offs+i-1));
                        }
                        else
                        {
                            v = (double)(0);
                        }
                        if( !isunit )
                        {
                            a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[offs+i];
                        }
                        else
                        {
                            a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[offs+i];
                        }
                    }
                    ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj);
                }
            }
        }
        return;
    }

    /*
     * Recursive case
     */
    tiledsplit(n, tscur, &n1, &n2, _state);
    mn = imin2(n1, n2, _state);
    touchint(&mn, _state);
    if( n2>0 )
    {
        if( isupper )
        {
            for(i=0; i<=n1-1; i++)
            {
                ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
            }
            rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state);
            matinv_rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
            rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state);
        }
        else
        {
            for(i=0; i<=n2-1; i++)
            {
                ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
            }
            rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state);
            matinv_rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
            rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state);
        }
    }
    matinv_rmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, _state);
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    ae_bool isupper,
    ae_bool isunit,
    /* Real    */ ae_vector* tmp,
    sinteger* info,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
Triangular matrix inversion, recursive subroutine.

Info is modified on failure, left unchanged on success.

  -- ALGLIB --
     05.02.2010, Bochkanov Sergey.
     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
     Courant Institute, Argonne National Lab, and Rice University
     February 29, 1992.
*************************************************************************/
static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     ae_bool isunit,
     /* Complex */ ae_vector* tmp,
     sinteger* info,
     ae_state *_state)
{
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t i;
    ae_int_t j;
    ae_complex v;
    ae_complex ajj;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;
    ae_int_t mn;


    if( n<1 )
    {
        info->val = -1;
        return;
    }
    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( n<=tsb )
    {
        tscur = tsa;
    }

    /*
     * Try to activate parallelism
     */
    if( n>=2*tsb&&ae_fp_greater_eq(rmul3((double)(n), (double)(n), (double)(n), _state)*((double)4/(double)3),smpactivationlevel(_state)) )
    {
        if( _trypexec_matinv_cmatrixtrinverserec(a,offs,n,isupper,isunit,tmp,info, _state) )
        {
            return;
        }
    }

    /*
     * Base case
     */
    if( n<=tsa )
    {
        if( isupper )
        {

            /*
             * Compute inverse of upper triangular matrix.
             */
            for(j=0; j<=n-1; j++)
            {
                if( !isunit )
                {
                    if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],(double)(0)) )
                    {
                        info->val = -3;
                        return;
                    }
                    a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
                    ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]);
                }
                else
                {
                    ajj = ae_complex_from_i(-1);
                }

                /*
                 * Compute elements 1:j-1 of j-th column.
                 */
                if( j>0 )
                {
                    ae_v_cmove(&tmp->ptr.p_complex[offs+0], 1, &a->ptr.pp_complex[offs+0][offs+j], a->stride, "N", ae_v_len(offs+0,offs+j-1));
                    for(i=0; i<=j-1; i++)
                    {
                        if( i<j-1 )
                        {
                            v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+i+1], 1, "N", &tmp->ptr.p_complex[offs+i+1], 1, "N", ae_v_len(offs+i+1,offs+j-1));
                        }
                        else
                        {
                            v = ae_complex_from_i(0);
                        }
                        if( !isunit )
                        {
                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[offs+i]));
                        }
                        else
                        {
                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[offs+i]);
                        }
                    }
                    ae_v_cmulc(&a->ptr.pp_complex[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj);
                }
            }
        }
        else
        {

            /*
             * Compute inverse of lower triangular matrix.
             */
            for(j=n-1; j>=0; j--)
            {
                if( !isunit )
                {
                    if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],(double)(0)) )
                    {
                        info->val = -3;
                        return;
                    }
                    a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
                    ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]);
                }
                else
                {
                    ajj = ae_complex_from_i(-1);
                }
                if( j<n-1 )
                {

                    /*
                     * Compute elements j+1:n of j-th column.
                     */
                    ae_v_cmove(&tmp->ptr.p_complex[offs+j+1], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(offs+j+1,offs+n-1));
                    for(i=j+1; i<=n-1; i++)
                    {
                        if( i>j+1 )
                        {
                            v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &tmp->ptr.p_complex[offs+j+1], 1, "N", ae_v_len(offs+j+1,offs+i-1));
                        }
                        else
                        {
                            v = ae_complex_from_i(0);
                        }
                        if( !isunit )
                        {
                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[offs+i]));
                        }
                        else
                        {
                            a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[offs+i]);
                        }
                    }
                    ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj);
                }
            }
        }
        return;
    }

    /*
     * Recursive case
     */
    tiledsplit(n, tscur, &n1, &n2, _state);
    mn = imin2(n1, n2, _state);
    touchint(&mn, _state);
    if( n2>0 )
    {
        if( isupper )
        {
            for(i=0; i<=n1-1; i++)
            {
                ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
            }
            cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state);
            matinv_cmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
            cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state);
        }
        else
        {
            for(i=0; i<=n2-1; i++)
            {
                ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
            }
            cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state);
            matinv_cmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
            cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state);
        }
    }
    matinv_cmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, _state);
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    ae_bool isupper,
    ae_bool isunit,
    /* Complex */ ae_vector* tmp,
    sinteger* info,
    ae_state *_state)
{
    return ae_false;
}


static void matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     /* Real    */ ae_vector* work,
     sinteger* info,
     matinvreport* rep,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    double v;
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;
    ae_int_t mn;


    if( n<1 )
    {
        info->val = -1;
        return;
    }
    tsa = matrixtilesizea(_state);
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( n<=tsb )
    {
        tscur = tsa;
    }

    /*
     * Try parallelism
     */
    if( n>=2*tsb&&ae_fp_greater_eq((double)8/(double)6*rmul3((double)(n), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_matinv_rmatrixluinverserec(a,offs,n,work,info,rep, _state) )
        {
            return;
        }
    }

    /*
     * Base case
     */
    if( n<=tsa )
    {

        /*
         * Form inv(U)
         */
        matinv_rmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, info, _state);
        if( info->val<=0 )
        {
            return;
        }

        /*
         * Solve the equation inv(A)*L = inv(U) for inv(A).
         */
        for(j=n-1; j>=0; j--)
        {

            /*
             * Copy current column of L to WORK and replace with zeros.
             */
            for(i=j+1; i<=n-1; i++)
            {
                work->ptr.p_double[i] = a->ptr.pp_double[offs+i][offs+j];
                a->ptr.pp_double[offs+i][offs+j] = (double)(0);
            }

            /*
             * Compute current column of inv(A).
             */
            if( j<n-1 )
            {
                for(i=0; i<=n-1; i++)
                {
                    v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &work->ptr.p_double[j+1], 1, ae_v_len(offs+j+1,offs+n-1));
                    a->ptr.pp_double[offs+i][offs+j] = a->ptr.pp_double[offs+i][offs+j]-v;
                }
            }
        }
        return;
    }

    /*
     * Recursive code:
     *
     *         ( L1      )   ( U1  U12 )
     * A    =  (         ) * (         )
     *         ( L12  L2 )   (     U2  )
     *
     *         ( W   X )
     * A^-1 =  (       )
     *         ( Y   Z )
     *
     * In-place calculation can be done as follows:
     * * X := inv(U1)*U12*inv(U2)
     * * Y := inv(L2)*L12*inv(L1)
     * * W := inv(L1*U1)+X*Y
     * * X := -X*inv(L2)
     * * Y := -inv(U2)*Y
     * * Z := inv(L2*U2)
     *
     * Reordering w.r.t. interdependencies gives us:
     *
     * * X := inv(U1)*U12      \ suitable for parallel execution
     * * Y := L12*inv(L1)      /
     *
     * * X := X*inv(U2)        \
     * * Y := inv(L2)*Y        | suitable for parallel execution
     * * W := inv(L1*U1)       /
     *
     * * W := W+X*Y
     *
     * * X := -X*inv(L2)       \ suitable for parallel execution
     * * Y := -inv(U2)*Y       /
     *
     * * Z := inv(L2*U2)
     */
    tiledsplit(n, tscur, &n1, &n2, _state);
    mn = imin2(n1, n2, _state);
    touchint(&mn, _state);
    ae_assert(n2>0, "LUInverseRec: internal error!", _state);

    /*
     * X := inv(U1)*U12
     * Y := L12*inv(L1)
     */
    rmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state);
    rmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state);

    /*
     * X := X*inv(U2)
     * Y := inv(L2)*Y
     * W := inv(L1*U1)
     */
    rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state);
    rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state);
    matinv_rmatrixluinverserec(a, offs, n1, work, info, rep, _state);
    if( info->val<=0 )
    {
        return;
    }

    /*
     * W := W+X*Y
     */
    rmatrixgemm(n1, n1, n2, 1.0, a, offs, offs+n1, 0, a, offs+n1, offs, 0, 1.0, a, offs, offs, _state);

    /*
     * X := -X*inv(L2)
     * Y := -inv(U2)*Y
     */
    rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state);
    rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state);
    for(i=0; i<=n1-1; i++)
    {
        ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
    }
    for(i=0; i<=n2-1; i++)
    {
        ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
    }

    /*
     * Z := inv(L2*U2)
     */
    matinv_rmatrixluinverserec(a, offs+n1, n2, work, info, rep, _state);
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    /* Real    */ ae_vector* work,
    sinteger* info,
    matinvreport* rep,
    ae_state *_state)
{
    return ae_false;
}


static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     /* Complex */ ae_vector* work,
     sinteger* ssinfo,
     matinvreport* rep,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t j;
    ae_complex v;
    ae_int_t n1;
    ae_int_t n2;
    ae_int_t mn;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;


    if( n<1 )
    {
        ssinfo->val = -1;
        return;
    }
    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( n<=tsb )
    {
        tscur = tsa;
    }

    /*
     * Try parallelism
     */
    if( n>=2*tsb&&ae_fp_greater_eq((double)32/(double)6*rmul3((double)(n), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
    {
        if( _trypexec_matinv_cmatrixluinverserec(a,offs,n,work,ssinfo,rep, _state) )
        {
            return;
        }
    }

    /*
     * Base case
     */
    if( n<=tsa )
    {

        /*
         * Form inv(U)
         */
        matinv_cmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, ssinfo, _state);
        if( ssinfo->val<=0 )
        {
            return;
        }

        /*
         * Solve the equation inv(A)*L = inv(U) for inv(A).
         */
        for(j=n-1; j>=0; j--)
        {

            /*
             * Copy current column of L to WORK and replace with zeros.
             */
            for(i=j+1; i<=n-1; i++)
            {
                work->ptr.p_complex[i] = a->ptr.pp_complex[offs+i][offs+j];
                a->ptr.pp_complex[offs+i][offs+j] = ae_complex_from_i(0);
            }

            /*
             * Compute current column of inv(A).
             */
            if( j<n-1 )
            {
                for(i=0; i<=n-1; i++)
                {
                    v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &work->ptr.p_complex[j+1], 1, "N", ae_v_len(offs+j+1,offs+n-1));
                    a->ptr.pp_complex[offs+i][offs+j] = ae_c_sub(a->ptr.pp_complex[offs+i][offs+j],v);
                }
            }
        }
        return;
    }

    /*
     * Recursive code:
     *
     *         ( L1      )   ( U1  U12 )
     * A    =  (         ) * (         )
     *         ( L12  L2 )   (     U2  )
     *
     *         ( W   X )
     * A^-1 =  (       )
     *         ( Y   Z )
     *
     * In-place calculation can be done as follows:
     * * X := inv(U1)*U12*inv(U2)
     * * Y := inv(L2)*L12*inv(L1)
     * * W := inv(L1*U1)+X*Y
     * * X := -X*inv(L2)
     * * Y := -inv(U2)*Y
     * * Z := inv(L2*U2)
     *
     * Reordering w.r.t. interdependencies gives us:
     *
     * * X := inv(U1)*U12      \ suitable for parallel execution
     * * Y := L12*inv(L1)      /
     *
     * * X := X*inv(U2)        \
     * * Y := inv(L2)*Y        | suitable for parallel execution
     * * W := inv(L1*U1)       /
     *
     * * W := W+X*Y
     *
     * * X := -X*inv(L2)       \ suitable for parallel execution
     * * Y := -inv(U2)*Y       /
     *
     * * Z := inv(L2*U2)
     */
    tiledsplit(n, tscur, &n1, &n2, _state);
    mn = imin2(n1, n2, _state);
    touchint(&mn, _state);
    ae_assert(n2>0, "LUInverseRec: internal error!", _state);

    /*
     * X := inv(U1)*U12
     * Y := L12*inv(L1)
     */
    cmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state);
    cmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state);

    /*
     * X := X*inv(U2)
     * Y := inv(L2)*Y
     * W := inv(L1*U1)
     */
    cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state);
    cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state);
    matinv_cmatrixluinverserec(a, offs, n1, work, ssinfo, rep, _state);
    if( ssinfo->val<=0 )
    {
        return;
    }

    /*
     * W := W+X*Y
     */
    cmatrixgemm(n1, n1, n2, ae_complex_from_d(1.0), a, offs, offs+n1, 0, a, offs+n1, offs, 0, ae_complex_from_d(1.0), a, offs, offs, _state);

    /*
     * X := -X*inv(L2)
     * Y := -inv(U2)*Y
     */
    cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state);
    cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state);
    for(i=0; i<=n1-1; i++)
    {
        ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
    }
    for(i=0; i<=n2-1; i++)
    {
        ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
    }

    /*
     * Z := inv(L2*U2)
     */
    matinv_cmatrixluinverserec(a, offs+n1, n2, work, ssinfo, rep, _state);
}


/*************************************************************************
Serial stub for GPL edition.
*************************************************************************/
ae_bool _trypexec_matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
    ae_int_t offs,
    ae_int_t n,
    /* Complex */ ae_vector* work,
    sinteger* ssinfo,
    matinvreport* rep,
    ae_state *_state)
{
    return ae_false;
}


/*************************************************************************
Recursive subroutine for HPD inversion.

  -- ALGLIB routine --
     10.02.2010
     Bochkanov Sergey
*************************************************************************/
static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a,
     ae_int_t offs,
     ae_int_t n,
     ae_bool isupper,
     /* Complex */ ae_vector* tmp,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_int_t i;
    ae_int_t j;
    ae_complex v;
    ae_int_t n1;
    ae_int_t n2;
    sinteger sinfo;
    ae_int_t tsa;
    ae_int_t tsb;
    ae_int_t tscur;

    ae_frame_make(_state, &_frame_block);
    memset(&sinfo, 0, sizeof(sinfo));
    _sinteger_init(&sinfo, _state, ae_true);

    if( n<1 )
    {
        ae_frame_leave(_state);
        return;
    }
    tsa = matrixtilesizea(_state)/2;
    tsb = matrixtilesizeb(_state);
    tscur = tsb;
    if( n<=tsb )
    {
        tscur = tsa;
    }

    /*
     * Base case
     */
    if( n<=tsa )
    {
        sinfo.val = 1;
        matinv_cmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &sinfo, _state);
        ae_assert(sinfo.val>0, "HPDMatrixCholeskyInverseRec: integrity check failed", _state);
        if( isupper )
        {

            /*
             * Compute the product U * U'.
             * NOTE: we never assume that diagonal of U is real
             */
            for(i=0; i<=n-1; i++)
            {
                if( i==0 )
                {

                    /*
                     * 1x1 matrix
                     */
                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
                }
                else
                {

                    /*
                     * (I+1)x(I+1) matrix,
                     *
                     * ( A11  A12 )   ( A11^H        )   ( A11*A11^H+A12*A12^H  A12*A22^H )
                     * (          ) * (              ) = (                                )
                     * (      A22 )   ( A12^H  A22^H )   ( A22*A12^H            A22*A22^H )
                     *
                     * A11 is IxI, A22 is 1x1.
                     */
                    ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+i], a->stride, "Conj", ae_v_len(0,i-1));
                    for(j=0; j<=i-1; j++)
                    {
                        v = a->ptr.pp_complex[offs+j][offs+i];
                        ae_v_caddc(&a->ptr.pp_complex[offs+j][offs+j], 1, &tmp->ptr.p_complex[j], 1, "N", ae_v_len(offs+j,offs+i-1), v);
                    }
                    v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state);
                    ae_v_cmulc(&a->ptr.pp_complex[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v);
                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
                }
            }
        }
        else
        {

            /*
             * Compute the product L' * L
             * NOTE: we never assume that diagonal of L is real
             */
            for(i=0; i<=n-1; i++)
            {
                if( i==0 )
                {

                    /*
                     * 1x1 matrix
                     */
                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
                }
                else
                {

                    /*
                     * (I+1)x(I+1) matrix,
                     *
                     * ( A11^H  A21^H )   ( A11      )   ( A11^H*A11+A21^H*A21  A21^H*A22 )
                     * (              ) * (          ) = (                                )
                     * (        A22^H )   ( A21  A22 )   ( A22^H*A21            A22^H*A22 )
                     *
                     * A11 is IxI, A22 is 1x1.
                     */
                    ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs], 1, "N", ae_v_len(0,i-1));
                    for(j=0; j<=i-1; j++)
                    {
                        v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+j], _state);
                        ae_v_caddc(&a->ptr.pp_complex[offs+j][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+j), v);
                    }
                    v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state);
                    ae_v_cmulc(&a->ptr.pp_complex[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v);
                    a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
                }
            }
        }
        ae_frame_leave(_state);
        return;
    }

    /*
     * Recursive code: triangular factor inversion merged with
     * UU' or L'L multiplication
     */
    tiledsplit(n, tscur, &n1, &n2, _state);

    /*
     * form off-diagonal block of trangular inverse
     */
    if( isupper )
    {
        for(i=0; i<=n1-1; i++)
        {
            ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
        }
        cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state);
        cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state);
    }
    else
    {
        for(i=0; i<=n2-1; i++)
        {
            ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
        }
        cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state);
        cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state);
    }

    /*
     * invert first diagonal block
     */
    matinv_hpdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state);

    /*
     * update first diagonal block with off-diagonal block,
     * update off-diagonal block
     */
    if( isupper )
    {
        cmatrixherk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state);
        cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs, offs+n1, _state);
    }
    else
    {
        cmatrixherk(n1, n2, 1.0, a, offs+n1, offs, 2, 1.0, a, offs, offs, isupper, _state);
        cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs+n1, offs, _state);
    }

    /*
     * invert second diagonal block
     */
    matinv_hpdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state);
    ae_frame_leave(_state);
}


void _matinvreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
{
    matinvreport *p = (matinvreport*)_p;
    ae_touch_ptr((void*)p);
}


void _matinvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
{
    matinvreport *dst = (matinvreport*)_dst;
    matinvreport *src = (matinvreport*)_src;
    dst->r1 = src->r1;
    dst->rinf = src->rinf;
}


void _matinvreport_clear(void* _p)
{
    matinvreport *p = (matinvreport*)_p;
    ae_touch_ptr((void*)p);
}


void _matinvreport_destroy(void* _p)
{
    matinvreport *p = (matinvreport*)_p;
    ae_touch_ptr((void*)p);
}


#endif
#if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm updates matrix A^-1 when adding a number to an element
of matrix A.

Input parameters:
    InvA    -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    UpdRow  -   row where the element to be updated is stored.
    UpdColumn - column where the element to be updated is stored.
    UpdVal  -   a number to be added to the element.


Output parameters:
    InvA    -   inverse of modified matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdatesimple(/* Real    */ ae_matrix* inva,
     ae_int_t n,
     ae_int_t updrow,
     ae_int_t updcolumn,
     double updval,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector t1;
    ae_vector t2;
    ae_int_t i;
    double lambdav;
    double vt;

    ae_frame_make(_state, &_frame_block);
    memset(&t1, 0, sizeof(t1));
    memset(&t2, 0, sizeof(t2));
    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);

    ae_assert(updrow>=0&&updrow<n, "RMatrixInvUpdateSimple: incorrect UpdRow!", _state);
    ae_assert(updcolumn>=0&&updcolumn<n, "RMatrixInvUpdateSimple: incorrect UpdColumn!", _state);
    ae_vector_set_length(&t1, n-1+1, _state);
    ae_vector_set_length(&t2, n-1+1, _state);

    /*
     * T1 = InvA * U
     */
    ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1));

    /*
     * T2 = v*InvA
     */
    ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1));

    /*
     * Lambda = v * InvA * U
     */
    lambdav = updval*inva->ptr.pp_double[updcolumn][updrow];

    /*
     * InvA = InvA - correction
     */
    for(i=0; i<=n-1; i++)
    {
        vt = updval*t1.ptr.p_double[i];
        vt = vt/(1+lambdav);
        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm updates matrix A^-1 when adding a vector to a row
of matrix A.

Input parameters:
    InvA    -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    UpdRow  -   the row of A whose vector V was added.
                0 <= Row <= N-1
    V       -   the vector to be added to a row.
                Array whose index ranges within [0..N-1].

Output parameters:
    InvA    -   inverse of modified matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdaterow(/* Real    */ ae_matrix* inva,
     ae_int_t n,
     ae_int_t updrow,
     /* Real    */ ae_vector* v,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector t1;
    ae_vector t2;
    ae_int_t i;
    ae_int_t j;
    double lambdav;
    double vt;

    ae_frame_make(_state, &_frame_block);
    memset(&t1, 0, sizeof(t1));
    memset(&t2, 0, sizeof(t2));
    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);

    ae_vector_set_length(&t1, n-1+1, _state);
    ae_vector_set_length(&t2, n-1+1, _state);

    /*
     * T1 = InvA * U
     */
    ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1));

    /*
     * T2 = v*InvA
     * Lambda = v * InvA * U
     */
    for(j=0; j<=n-1; j++)
    {
        vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1));
        t2.ptr.p_double[j] = vt;
    }
    lambdav = t2.ptr.p_double[updrow];

    /*
     * InvA = InvA - correction
     */
    for(i=0; i<=n-1; i++)
    {
        vt = t1.ptr.p_double[i]/(1+lambdav);
        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm updates matrix A^-1 when adding a vector to a column
of matrix A.

Input parameters:
    InvA        -   inverse of matrix A.
                    Array whose indexes range within [0..N-1, 0..N-1].
    N           -   size of matrix A.
    UpdColumn   -   the column of A whose vector U was added.
                    0 <= UpdColumn <= N-1
    U           -   the vector to be added to a column.
                    Array whose index ranges within [0..N-1].

Output parameters:
    InvA        -   inverse of modified matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdatecolumn(/* Real    */ ae_matrix* inva,
     ae_int_t n,
     ae_int_t updcolumn,
     /* Real    */ ae_vector* u,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector t1;
    ae_vector t2;
    ae_int_t i;
    double lambdav;
    double vt;

    ae_frame_make(_state, &_frame_block);
    memset(&t1, 0, sizeof(t1));
    memset(&t2, 0, sizeof(t2));
    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);

    ae_vector_set_length(&t1, n-1+1, _state);
    ae_vector_set_length(&t2, n-1+1, _state);

    /*
     * T1 = InvA * U
     * Lambda = v * InvA * U
     */
    for(i=0; i<=n-1; i++)
    {
        vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1));
        t1.ptr.p_double[i] = vt;
    }
    lambdav = t1.ptr.p_double[updcolumn];

    /*
     * T2 = v*InvA
     */
    ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1));

    /*
     * InvA = InvA - correction
     */
    for(i=0; i<=n-1; i++)
    {
        vt = t1.ptr.p_double[i]/(1+lambdav);
        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
    }
    ae_frame_leave(_state);
}


/*************************************************************************
Inverse matrix update by the Sherman-Morrison formula

The algorithm computes the inverse of matrix A+u*v' by using the given matrix
A^-1 and the vectors u and v.

Input parameters:
    InvA    -   inverse of matrix A.
                Array whose indexes range within [0..N-1, 0..N-1].
    N       -   size of matrix A.
    U       -   the vector modifying the matrix.
                Array whose index ranges within [0..N-1].
    V       -   the vector modifying the matrix.
                Array whose index ranges within [0..N-1].

Output parameters:
    InvA - inverse of matrix A + u*v'.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
void rmatrixinvupdateuv(/* Real    */ ae_matrix* inva,
     ae_int_t n,
     /* Real    */ ae_vector* u,
     /* Real    */ ae_vector* v,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector t1;
    ae_vector t2;
    ae_int_t i;
    ae_int_t j;
    double lambdav;
    double vt;

    ae_frame_make(_state, &_frame_block);
    memset(&t1, 0, sizeof(t1));
    memset(&t2, 0, sizeof(t2));
    ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);

    ae_vector_set_length(&t1, n-1+1, _state);
    ae_vector_set_length(&t2, n-1+1, _state);

    /*
     * T1 = InvA * U
     * Lambda = v * T1
     */
    for(i=0; i<=n-1; i++)
    {
        vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1));
        t1.ptr.p_double[i] = vt;
    }
    lambdav = ae_v_dotproduct(&v->ptr.p_double[0], 1, &t1.ptr.p_double[0], 1, ae_v_len(0,n-1));

    /*
     * T2 = v*InvA
     */
    for(j=0; j<=n-1; j++)
    {
        vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1));
        t2.ptr.p_double[j] = vt;
    }

    /*
     * InvA = InvA - correction
     */
    for(i=0; i<=n-1; i++)
    {
        vt = t1.ptr.p_double[i]/(1+lambdav);
        ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
    }
    ae_frame_leave(_state);
}


#endif
#if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Subroutine performing the Schur decomposition of a general matrix by using
the QR algorithm with multiple shifts.

COMMERCIAL EDITION OF ALGLIB:

  ! Commercial version of ALGLIB includes one  important  improvement   of
  ! this function, which can be used from C++ and C#:
  ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
  !
  ! Intel MKL gives approximately constant  (with  respect  to  number  of
  ! worker threads) acceleration factor which depends on CPU  being  used,
  ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
  ! comparison.
  !
  ! Multithreaded acceleration is NOT supported for this function.
  !
  ! We recommend you to read 'Working with commercial version' section  of
  ! ALGLIB Reference Manual in order to find out how to  use  performance-
  ! related features provided by commercial edition of ALGLIB.

The source matrix A is represented as S'*A*S = T, where S is an orthogonal
matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of
sizes 1x1 and 2x2 on the main diagonal).

Input parameters:
    A   -   matrix to be decomposed.
            Array whose indexes range within [0..N-1, 0..N-1].
    N   -   size of A, N>=0.


Output parameters:
    A   -   contains matrix T.
            Array whose indexes range within [0..N-1, 0..N-1].
    S   -   contains Schur vectors.
            Array whose indexes range within [0..N-1, 0..N-1].

Note 1:
    The block structure of matrix T can be easily recognized: since all
    the elements below the blocks are zeros, the elements a[i+1,i] which
    are equal to 0 show the block border.

Note 2:
    The algorithm performance depends on the value of the internal parameter
    NS of the InternalSchurDecomposition subroutine which defines the number
    of shifts in the QR algorithm (similarly to the block width in block-matrix
    algorithms in linear algebra). If you require maximum performance on
    your machine, it is recommended to adjust this parameter manually.

Result:
    True,
        if the algorithm has converged and parameters A and S contain the result.
    False,
        if the algorithm has not converged.

Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library).
*************************************************************************/
ae_bool rmatrixschur(/* Real    */ ae_matrix* a,
     ae_int_t n,
     /* Real    */ ae_matrix* s,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_vector tau;
    ae_vector wi;
    ae_vector wr;
    ae_int_t info;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&tau, 0, sizeof(tau));
    memset(&wi, 0, sizeof(wi));
    memset(&wr, 0, sizeof(wr));
    ae_matrix_clear(s);
    ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wi, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&wr, 0, DT_REAL, _state, ae_true);


    /*
     * Upper Hessenberg form of the 0-based matrix
     */
    rmatrixhessenberg(a, n, &tau, _state);
    rmatrixhessenbergunpackq(a, n, &tau, s, _state);

    /*
     * Schur decomposition
     */
    rmatrixinternalschurdecomposition(a, n, 1, 1, &wr, &wi, s, &info, _state);
    result = info==0;
    ae_frame_leave(_state);
    return result;
}


#endif
#if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Algorithm for solving the following generalized symmetric positive-definite
eigenproblem:
    A*x = lambda*B*x (1) or
    A*B*x = lambda*x (2) or
    B*A*x = lambda*x (3).
where A is a symmetric matrix, B - symmetric positive-definite matrix.
The problem is solved by reducing it to an ordinary  symmetric  eigenvalue
problem.

Input parameters:
    A           -   symmetric matrix which is given by its upper or lower
                    triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    N           -   size of matrices A and B.
    IsUpperA    -   storage format of matrix A.
    B           -   symmetric positive-definite matrix which is given by
                    its upper or lower triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    IsUpperB    -   storage format of matrix B.
    ZNeeded     -   if ZNeeded is equal to:
                     * 0, the eigenvectors are not returned;
                     * 1, the eigenvectors are returned.
    ProblemType -   if ProblemType is equal to:
                     * 1, the following problem is solved: A*x = lambda*B*x;
                     * 2, the following problem is solved: A*B*x = lambda*x;
                     * 3, the following problem is solved: B*A*x = lambda*x.

Output parameters:
    D           -   eigenvalues in ascending order.
                    Array whose index ranges within [0..N-1].
    Z           -   if ZNeeded is equal to:
                     * 0, Z hasn't changed;
                     * 1, Z contains eigenvectors.
                    Array whose indexes range within [0..N-1, 0..N-1].
                    The eigenvectors are stored in matrix columns. It should
                    be noted that the eigenvectors in such problems do not
                    form an orthogonal system.

Result:
    True, if the problem was solved successfully.
    False, if the error occurred during the Cholesky decomposition of matrix
    B (the matrix isn't positive-definite) or during the work of the iterative
    algorithm for solving the symmetric eigenproblem.

See also the GeneralizedSymmetricDefiniteEVDReduce subroutine.

  -- ALGLIB --
     Copyright 1.28.2006 by Bochkanov Sergey
*************************************************************************/
ae_bool smatrixgevd(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isuppera,
     /* Real    */ ae_matrix* b,
     ae_bool isupperb,
     ae_int_t zneeded,
     ae_int_t problemtype,
     /* Real    */ ae_vector* d,
     /* Real    */ ae_matrix* z,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_matrix r;
    ae_matrix t;
    ae_bool isupperr;
    ae_int_t j1;
    ae_int_t j2;
    ae_int_t j1inc;
    ae_int_t j2inc;
    ae_int_t i;
    ae_int_t j;
    double v;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&r, 0, sizeof(r));
    memset(&t, 0, sizeof(t));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_clear(d);
    ae_matrix_clear(z);
    ae_matrix_init(&r, 0, 0, DT_REAL, _state, ae_true);
    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);


    /*
     * Reduce and solve
     */
    result = smatrixgevdreduce(a, n, isuppera, b, isupperb, problemtype, &r, &isupperr, _state);
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }
    result = smatrixevd(a, n, zneeded, isuppera, d, &t, _state);
    if( !result )
    {
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Transform eigenvectors if needed
     */
    if( zneeded!=0 )
    {

        /*
         * fill Z with zeros
         */
        ae_matrix_set_length(z, n-1+1, n-1+1, _state);
        for(j=0; j<=n-1; j++)
        {
            z->ptr.pp_double[0][j] = 0.0;
        }
        for(i=1; i<=n-1; i++)
        {
            ae_v_move(&z->ptr.pp_double[i][0], 1, &z->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
        }

        /*
         * Setup R properties
         */
        if( isupperr )
        {
            j1 = 0;
            j2 = n-1;
            j1inc = 1;
            j2inc = 0;
        }
        else
        {
            j1 = 0;
            j2 = 0;
            j1inc = 0;
            j2inc = 1;
        }

        /*
         * Calculate R*Z
         */
        for(i=0; i<=n-1; i++)
        {
            for(j=j1; j<=j2; j++)
            {
                v = r.ptr.pp_double[i][j];
                ae_v_addd(&z->ptr.pp_double[i][0], 1, &t.ptr.pp_double[j][0], 1, ae_v_len(0,n-1), v);
            }
            j1 = j1+j1inc;
            j2 = j2+j2inc;
        }
    }
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Algorithm for reduction of the following generalized symmetric positive-
definite eigenvalue problem:
    A*x = lambda*B*x (1) or
    A*B*x = lambda*x (2) or
    B*A*x = lambda*x (3)
to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
the given problems are the same, and the eigenvectors of the given problem
could be obtained by multiplying the obtained eigenvectors by the
transformation matrix x = R*y).

Here A is a symmetric matrix, B - symmetric positive-definite matrix.

Input parameters:
    A           -   symmetric matrix which is given by its upper or lower
                    triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    N           -   size of matrices A and B.
    IsUpperA    -   storage format of matrix A.
    B           -   symmetric positive-definite matrix which is given by
                    its upper or lower triangular part.
                    Array whose indexes range within [0..N-1, 0..N-1].
    IsUpperB    -   storage format of matrix B.
    ProblemType -   if ProblemType is equal to:
                     * 1, the following problem is solved: A*x = lambda*B*x;
                     * 2, the following problem is solved: A*B*x = lambda*x;
                     * 3, the following problem is solved: B*A*x = lambda*x.

Output parameters:
    A           -   symmetric matrix which is given by its upper or lower
                    triangle depending on IsUpperA. Contains matrix C.
                    Array whose indexes range within [0..N-1, 0..N-1].
    R           -   upper triangular or low triangular transformation matrix
                    which is used to obtain the eigenvectors of a given problem
                    as the product of eigenvectors of C (from the right) and
                    matrix R (from the left). If the matrix is upper
                    triangular, the elements below the main diagonal
                    are equal to 0 (and vice versa). Thus, we can perform
                    the multiplication without taking into account the
                    internal structure (which is an easier though less
                    effective way).
                    Array whose indexes range within [0..N-1, 0..N-1].
    IsUpperR    -   type of matrix R (upper or lower triangular).

Result:
    True, if the problem was reduced successfully.
    False, if the error occurred during the Cholesky decomposition of
        matrix B (the matrix is not positive-definite).

  -- ALGLIB --
     Copyright 1.28.2006 by Bochkanov Sergey
*************************************************************************/
ae_bool smatrixgevdreduce(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isuppera,
     /* Real    */ ae_matrix* b,
     ae_bool isupperb,
     ae_int_t problemtype,
     /* Real    */ ae_matrix* r,
     ae_bool* isupperr,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix t;
    ae_vector w1;
    ae_vector w2;
    ae_vector w3;
    ae_int_t i;
    ae_int_t j;
    double v;
    matinvreport rep;
    ae_int_t info;
    ae_bool result;

    ae_frame_make(_state, &_frame_block);
    memset(&t, 0, sizeof(t));
    memset(&w1, 0, sizeof(w1));
    memset(&w2, 0, sizeof(w2));
    memset(&w3, 0, sizeof(w3));
    memset(&rep, 0, sizeof(rep));
    ae_matrix_clear(r);
    *isupperr = ae_false;
    ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&w1, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&w2, 0, DT_REAL, _state, ae_true);
    ae_vector_init(&w3, 0, DT_REAL, _state, ae_true);
    _matinvreport_init(&rep, _state, ae_true);

    ae_assert(n>0, "SMatrixGEVDReduce: N<=0!", _state);
    ae_assert((problemtype==1||problemtype==2)||problemtype==3, "SMatrixGEVDReduce: incorrect ProblemType!", _state);
    result = ae_true;

    /*
     * Problem 1:  A*x = lambda*B*x
     *
     * Reducing to:
     *     C*y = lambda*y
     *     C = L^(-1) * A * L^(-T)
     *     x = L^(-T) * y
     */
    if( problemtype==1 )
    {

        /*
         * Factorize B in T: B = LL'
         */
        ae_matrix_set_length(&t, n-1+1, n-1+1, _state);
        if( isupperb )
        {
            for(i=0; i<=n-1; i++)
            {
                ae_v_move(&t.ptr.pp_double[i][i], t.stride, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                ae_v_move(&t.ptr.pp_double[i][0], 1, &b->ptr.pp_double[i][0], 1, ae_v_len(0,i));
            }
        }
        if( !spdmatrixcholesky(&t, n, ae_false, _state) )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Invert L in T
         */
        rmatrixtrinverse(&t, n, ae_false, ae_false, &info, &rep, _state);
        if( info<=0 )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Build L^(-1) * A * L^(-T) in R
         */
        ae_vector_set_length(&w1, n+1, _state);
        ae_vector_set_length(&w2, n+1, _state);
        ae_matrix_set_length(r, n-1+1, n-1+1, _state);
        for(j=1; j<=n; j++)
        {

            /*
             * Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T))
             */
            ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][0], 1, ae_v_len(1,j));
            symmetricmatrixvectormultiply(a, isuppera, 0, j-1, &w1, 1.0, &w2, _state);
            if( isuppera )
            {
                matrixvectormultiply(a, 0, j-1, j, n-1, ae_true, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state);
            }
            else
            {
                matrixvectormultiply(a, j, n-1, 0, j-1, ae_false, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state);
            }

            /*
             * Form l(i)*w2 (here l(i) is i-th row of L^(-1))
             */
            for(i=1; i<=n; i++)
            {
                v = ae_v_dotproduct(&t.ptr.pp_double[i-1][0], 1, &w2.ptr.p_double[1], 1, ae_v_len(0,i-1));
                r->ptr.pp_double[i-1][j-1] = v;
            }
        }

        /*
         * Copy R to A
         */
        for(i=0; i<=n-1; i++)
        {
            ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
        }

        /*
         * Copy L^(-1) from T to R and transpose
         */
        *isupperr = ae_true;
        for(i=0; i<=n-1; i++)
        {
            for(j=0; j<=i-1; j++)
            {
                r->ptr.pp_double[i][j] = (double)(0);
            }
        }
        for(i=0; i<=n-1; i++)
        {
            ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], t.stride, ae_v_len(i,n-1));
        }
        ae_frame_leave(_state);
        return result;
    }

    /*
     * Problem 2:  A*B*x = lambda*x
     * or
     * problem 3:  B*A*x = lambda*x
     *
     * Reducing to:
     *     C*y = lambda*y
     *     C = U * A * U'
     *     B = U'* U
     */
    if( problemtype==2||problemtype==3 )
    {

        /*
         * Factorize B in T: B = U'*U
         */
        ae_matrix_set_length(&t, n-1+1, n-1+1, _state);
        if( isupperb )
        {
            for(i=0; i<=n-1; i++)
            {
                ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
            }
        }
        else
        {
            for(i=0; i<=n-1; i++)
            {
                ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], b->stride, ae_v_len(i,n-1));
            }
        }
        if( !spdmatrixcholesky(&t, n, ae_true, _state) )
        {
            result = ae_false;
            ae_frame_leave(_state);
            return result;
        }

        /*
         * Build U * A * U' in R
         */
        ae_vector_set_length(&w1, n+1, _state);
        ae_vector_set_length(&w2, n+1, _state);
        ae_vector_set_length(&w3, n+1, _state);
        ae_matrix_set_length(r, n-1+1, n-1+1, _state);
        for(j=1; j<=n; j++)
        {

            /*
             * Form w2 = A * u'(j) (here u'(j) is j-th column of U')
             */
            ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(1,n-j+1));
            symmetricmatrixvectormultiply(a, isuppera, j-1, n-1, &w1, 1.0, &w3, _state);
            ae_v_move(&w2.ptr.p_double[j], 1, &w3.ptr.p_double[1], 1, ae_v_len(j,n));
            ae_v_move(&w1.ptr.p_double[j], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(j,n));
            if( isuppera )
            {
                matrixvectormultiply(a, 0, j-2, j-1, n-1, ae_false, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state);
            }
            else
            {
                matrixvectormultiply(a, j-1, n-1, 0, j-2, ae_true, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state);
            }

            /*
             * Form u(i)*w2 (here u(i) is i-th row of U)
             */
            for(i=1; i<=n; i++)
            {
                v = ae_v_dotproduct(&t.ptr.pp_double[i-1][i-1], 1, &w2.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
                r->ptr.pp_double[i-1][j-1] = v;
            }
        }

        /*
         * Copy R to A
         */
        for(i=0; i<=n-1; i++)
        {
            ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
        }
        if( problemtype==2 )
        {

            /*
             * Invert U in T
             */
            rmatrixtrinverse(&t, n, ae_true, ae_false, &info, &rep, _state);
            if( info<=0 )
            {
                result = ae_false;
                ae_frame_leave(_state);
                return result;
            }

            /*
             * Copy U^-1 from T to R
             */
            *isupperr = ae_true;
            for(i=0; i<=n-1; i++)
            {
                for(j=0; j<=i-1; j++)
                {
                    r->ptr.pp_double[i][j] = (double)(0);
                }
            }
            for(i=0; i<=n-1; i++)
            {
                ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
            }
        }
        else
        {

            /*
             * Copy U from T to R and transpose
             */
            *isupperr = ae_false;
            for(i=0; i<=n-1; i++)
            {
                for(j=i+1; j<=n-1; j++)
                {
                    r->ptr.pp_double[i][j] = (double)(0);
                }
            }
            for(i=0; i<=n-1; i++)
            {
                ae_v_move(&r->ptr.pp_double[i][i], r->stride, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
            }
        }
    }
    ae_frame_leave(_state);
    return result;
}


#endif
#if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)


/*************************************************************************
Determinant calculation of the matrix given by its LU decomposition.

Input parameters:
    A       -   LU decomposition of the matrix (output of
                RMatrixLU subroutine).
    Pivots  -   table of permutations which were made during
                the LU decomposition.
                Output of RMatrixLU subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: matrix determinant.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
double rmatrixludet(/* Real    */ ae_matrix* a,
     /* Integer */ ae_vector* pivots,
     ae_int_t n,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t s;
    double result;


    ae_assert(n>=1, "RMatrixLUDet: N<1!", _state);
    ae_assert(pivots->cnt>=n, "RMatrixLUDet: Pivots array is too short!", _state);
    ae_assert(a->rows>=n, "RMatrixLUDet: rows(A)<N!", _state);
    ae_assert(a->cols>=n, "RMatrixLUDet: cols(A)<N!", _state);
    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUDet: A contains infinite or NaN values!", _state);
    result = (double)(1);
    s = 1;
    for(i=0; i<=n-1; i++)
    {
        result = result*a->ptr.pp_double[i][i];
        if( pivots->ptr.p_int[i]!=i )
        {
            s = -s;
        }
    }
    result = result*s;
    return result;
}


/*************************************************************************
Calculation of the determinant of a general matrix

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1]
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: determinant of matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
double rmatrixdet(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_vector pivots;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&pivots, 0, sizeof(pivots));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>=1, "RMatrixDet: N<1!", _state);
    ae_assert(a->rows>=n, "RMatrixDet: rows(A)<N!", _state);
    ae_assert(a->cols>=n, "RMatrixDet: cols(A)<N!", _state);
    ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixDet: A contains infinite or NaN values!", _state);
    rmatrixlu(a, n, n, &pivots, _state);
    result = rmatrixludet(a, &pivots, n, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Determinant calculation of the matrix given by its LU decomposition.

Input parameters:
    A       -   LU decomposition of the matrix (output of
                RMatrixLU subroutine).
    Pivots  -   table of permutations which were made during
                the LU decomposition.
                Output of RMatrixLU subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: matrix determinant.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
ae_complex cmatrixludet(/* Complex */ ae_matrix* a,
     /* Integer */ ae_vector* pivots,
     ae_int_t n,
     ae_state *_state)
{
    ae_int_t i;
    ae_int_t s;
    ae_complex result;


    ae_assert(n>=1, "CMatrixLUDet: N<1!", _state);
    ae_assert(pivots->cnt>=n, "CMatrixLUDet: Pivots array is too short!", _state);
    ae_assert(a->rows>=n, "CMatrixLUDet: rows(A)<N!", _state);
    ae_assert(a->cols>=n, "CMatrixLUDet: cols(A)<N!", _state);
    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUDet: A contains infinite or NaN values!", _state);
    result = ae_complex_from_i(1);
    s = 1;
    for(i=0; i<=n-1; i++)
    {
        result = ae_c_mul(result,a->ptr.pp_complex[i][i]);
        if( pivots->ptr.p_int[i]!=i )
        {
            s = -s;
        }
    }
    result = ae_c_mul_d(result,(double)(s));
    return result;
}


/*************************************************************************
Calculation of the determinant of a general matrix

Input parameters:
    A       -   matrix, array[0..N-1, 0..N-1]
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

Result: determinant of matrix A.

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
ae_complex cmatrixdet(/* Complex */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_vector pivots;
    ae_complex result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    memset(&pivots, 0, sizeof(pivots));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;
    ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);

    ae_assert(n>=1, "CMatrixDet: N<1!", _state);
    ae_assert(a->rows>=n, "CMatrixDet: rows(A)<N!", _state);
    ae_assert(a->cols>=n, "CMatrixDet: cols(A)<N!", _state);
    ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixDet: A contains infinite or NaN values!", _state);
    cmatrixlu(a, n, n, &pivots, _state);
    result = cmatrixludet(a, &pivots, n, _state);
    ae_frame_leave(_state);
    return result;
}


/*************************************************************************
Determinant calculation of the matrix given by the Cholesky decomposition.

Input parameters:
    A       -   Cholesky decomposition,
                output of SMatrixCholesky subroutine.
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)

As the determinant is equal to the product of squares of diagonal elements,
it's not necessary to specify which triangle - lower or upper - the matrix
is stored in.

Result:
    matrix determinant.

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
double spdmatrixcholeskydet(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_state *_state)
{
    ae_int_t i;
    ae_bool f;
    double result;


    ae_assert(n>=1, "SPDMatrixCholeskyDet: N<1!", _state);
    ae_assert(a->rows>=n, "SPDMatrixCholeskyDet: rows(A)<N!", _state);
    ae_assert(a->cols>=n, "SPDMatrixCholeskyDet: cols(A)<N!", _state);
    f = ae_true;
    for(i=0; i<=n-1; i++)
    {
        f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state);
    }
    ae_assert(f, "SPDMatrixCholeskyDet: A contains infinite or NaN values!", _state);
    result = (double)(1);
    for(i=0; i<=n-1; i++)
    {
        result = result*ae_sqr(a->ptr.pp_double[i][i], _state);
    }
    return result;
}


/*************************************************************************
Determinant calculation of the symmetric positive definite matrix.

Input parameters:
    A       -   matrix. Array with elements [0..N-1, 0..N-1].
    N       -   (optional) size of matrix A:
                * if given, only principal NxN submatrix is processed and
                  overwritten. other elements are unchanged.
                * if not given, automatically determined from matrix size
                  (A must be square matrix)
    IsUpper -   (optional) storage type:
                * if True, symmetric matrix  A  is  given  by  its  upper
                  triangle, and the lower triangle isn't used/changed  by
                  function
                * if False, symmetric matrix  A  is  given  by  its lower
                  triangle, and the upper triangle isn't used/changed  by
                  function
                * if not given, both lower and upper  triangles  must  be
                  filled.

Result:
    determinant of matrix A.
    If matrix A is not positive definite, exception is thrown.

  -- ALGLIB --
     Copyright 2005-2008 by Bochkanov Sergey
*************************************************************************/
double spdmatrixdet(/* Real    */ ae_matrix* a,
     ae_int_t n,
     ae_bool isupper,
     ae_state *_state)
{
    ae_frame _frame_block;
    ae_matrix _a;
    ae_bool b;
    double result;

    ae_frame_make(_state, &_frame_block);
    memset(&_a, 0, sizeof(_a));
    ae_matrix_init_copy(&_a, a, _state, ae_true);
    a = &_a;

    ae_assert(n>=1, "SPDMatrixDet: N<1!", _state);
    ae_assert(a->rows>=n, "SPDMatrixDet: rows(A)<N!", _state);
    ae_assert(a->cols>=n, "SPDMatrixDet: cols(A)<N!", _state);
    ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixDet: A contains infinite or NaN values!", _state);
    b = spdmatrixcholesky(a, n, isupper, _state);
    ae_assert(b, "SPDMatrixDet: A is not SPD!", _state);
    result = spdmatrixcholeskydet(a, n, _state);
    ae_frame_leave(_state);
    return result;
}


#endif

}