1 /*
2     This file is a port of the 'dgelsy' and 'zgelsy' functions (and the
3     functions that they call) from liblapack to C++.
4 
5     liblapack has the following license/copyright notice,
6     see http://www.netlib.org/lapack/LICENSE:
7 
8     Copyright (c) 1992-2011 The University of Tennessee and The University
9                             of Tennessee Research Foundation.  All rights
10                             reserved.
11     Copyright (c) 2000-2011 The University of California Berkeley. All
12                             rights reserved.
13     Copyright (c) 2006-2011 The University of Colorado Denver.  All rights
14                             reserved.
15 
16 
17     Redistribution and use in source and binary forms, with or without
18     modification, are permitted provided that the following conditions are
19     met:
20 
21     - Redistributions of source code must retain the above copyright
22       notice, this list of conditions and the following disclaimer.
23 
24     - Redistributions in binary form must reproduce the above copyright
25       notice, this list of conditions and the following disclaimer listed
26       in this license in the documentation and/or other materials
27       provided with the distribution.
28 
29     - Neither the name of the copyright holders nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
37     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45 
46 /*
47     This file is part of GNU APL, a free implementation of the
48     ISO/IEC Standard 13751, "Programming Language APL, Extended"
49 
50     Copyright (C) 2008-2015  Dr. Jürgen Sauermann
51 
52     This program is free software: you can redistribute it and/or modify
53     it under the terms of the GNU General Public License as published by
54     the Free Software Foundation, either version 3 of the License, or
55     (at your option) any later version.
56 
57     This program is distributed in the hope that it will be useful,
58     but WITHOUT ANY WARRANTY; without even the implied warranty of
59     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
60     GNU General Public License for more details.
61 
62     You should have received a copy of the GNU General Public License
63     along with this program.  If not, see <http://www.gnu.org/licenses/>.
64  */
65 
66 #ifndef __GELSY_HH_DEFINED__
67 #define __GELSY_HH_DEFINED__
68 
69 #ifndef assert
70 # define assert(x)
71 #endif
72 
73 #include <stdio.h>
74 #include <stdlib.h>
75 
76 using namespace std;
77 /// a real number
78 typedef APL_Float DD;
79 
80 /// a complex number
81 class ZZ
82 {
83 public:
84    /// 0J0
ZZ()85    ZZ()
86    : _r(0),
87      _i(0)
88    {}
89 
90    /// rJ0
ZZ(APL_Float r)91    ZZ(APL_Float r)
92    : _r(r),
93      _i(0)
94    {}
95 
96    /// rJi
ZZ(APL_Float r,APL_Float i)97    ZZ(APL_Float r, APL_Float i)
98    : _r(r),
99      _i(i)
100    {}
101 
102    /// return the real part of \b this
real() const103    APL_Float real() const       { return _r; }
104 
105    /// set the real part of \b this
set_real(APL_Float x)106    void set_real(APL_Float x)   { _r = x; }
107 
108    /// return the imag part of \b this
imag() const109    APL_Float imag() const    { return _i; }
110 
111    /// set the imag part of \b this
set_imag(APL_Float x)112    void set_imag(APL_Float x)   { _i = x; }
113 
114    /// conjugate \b this
conjugate()115    void conjugate()   { _i = - _i; }
116 
117    /// return true if \b this is real and not equal to \b dd
operator !=(APL_Float dd) const118    bool operator != (APL_Float dd) const   { return _r != dd || _i != 0.0; }
119 
120    /// return true if \b this is real and equal to \b dd
operator ==(APL_Float dd) const121    bool operator == (APL_Float dd) const   { return _r == dd && _i == 0.0; }
122 
123    /// add \b r zz to \b this
operator +(const ZZ & zz) const124    ZZ operator +(const ZZ & zz) const
125       { return ZZ(_r + zz._r, _i + zz._i); }
126 
127    /// subtract \b zz from \b this
operator -(const ZZ & zz) const128    ZZ operator -(const ZZ & zz) const
129       { return ZZ(_r - zz._r, _i - zz._i); }
130 
131    /// negate \b this
operator -() const132    ZZ operator -() const
133       { return ZZ(-_r, -_i); }
134 
135    /// multiply \b zz and \b this
operator *(const ZZ & zz) const136    ZZ operator *(const ZZ & zz) const
137       { return ZZ(_r * zz._r - _i * zz._i,
138                   _i * zz._r + _r * zz._i); }
139 
140    /// divide \b this by \b d
operator /(APL_Float d) const141    ZZ operator /(APL_Float d) const
142       { return ZZ(_r/d, _i/d); }
143 
144    /// divide \b this by \b zz
operator /(const ZZ & zz) const145    ZZ operator /(const ZZ & zz) const
146       {
147         const APL_Float denom = zz._r * zz._r + zz._i * zz._i;
148          return ZZ((_r * zz._r + _i * zz._i) / denom,
149                    (_i * zz._r - _r * zz._i) / denom);
150       }
151 
152 protected:
153    /// the real part
154   APL_Float _r;
155 
156    /// the imaginary part
157   APL_Float _i;
158 };
159 
160 /// the maximum of \b x and \b y
161 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
162 
163 /// the minimum of \b x and \b y
164 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
165 
166 /** a single double or complex number. This class contains wrappers to
167     double or complex numbers so that they can be used in templates.
168  */
169 /// A single double or complex number
170 class DZ
171 {
172 public:
173    /// return true if the template argument (DD or ZZ) is complex (i.e. ZZ)
is_ZZ(DD x)174    static bool is_ZZ(DD x)   { return false; }
175    /// return true if the template argument (DD or ZZ) is complex (i.e. ZZ)
is_ZZ(ZZ z)176    static bool is_ZZ(ZZ z)   { return true;  }
177 
178    /// return the real part of \b x
get_real(DD x)179    static APL_Float get_real(DD x)   { return x;   }
180 
181    /// return the imaginary part of \b x
get_imag(DD x)182    static APL_Float get_imag(DD x)   { return 0.0; }
183 
184    /// return the real part of \b x
get_real(ZZ z)185    static APL_Float get_real(ZZ z)   { return z.real(); }
186 
187    /// return the imaginary part of \b x
get_imag(ZZ z)188    static APL_Float get_imag(ZZ z)   { return z.imag(); }
189 
190    /// set the real part of \b x
set_real(DD & y,DD x)191    static void set_real(DD & y, DD x)   { y = x; }
192 
193    /// set the imaginary part of \b x
set_imag(DD & y,DD x)194    static void set_imag(DD & y, DD x)   { assert(x == 0.0); }
195 
196    /// set the real part of \b x
set_real(ZZ & y,DD x)197    static void set_real(ZZ & y, DD x)   { y.set_real(x); }
198 
199    /// set the imaginary part of \b x
set_imag(ZZ & y,DD x)200    static void set_imag(ZZ & y, DD x)   { y.set_imag(x); }
201 
202    /// conjugate \b x
conjugate(DD x)203    static void conjugate(DD x)     { }
204 
205    /// conjugate \b z
conjugate(ZZ & z)206    static void conjugate(ZZ & z)   { z.conjugate(); }
207 
208    /// return \b x conjugated
CONJ(DD x)209    static DD CONJ(DD x)           { return x; }
210 
211    /// return \b z conjugated
CONJ(const ZZ & z)212    static ZZ CONJ(const ZZ & z)   { return ZZ(z.real(), -z.imag()); }
213 
214    /// return the inverse of \b dd
inv(DD dd)215    static DD inv(DD dd)           { return 1.0 / dd; }
216 
217    /// return the inverse of \b zz
inv(const ZZ & zz)218    static inline ZZ inv(const ZZ & zz)
219       {
220         const APL_Float denom = zz.real() * zz.real() + zz.imag() * zz.imag();
221         return ZZ(zz.real()/denom, - zz.imag()/denom);
222       }
223 };
224 
225 /// return the real part of \b x
REAL(T x)226 template<typename T> APL_Float REAL(T x)        { return DZ::get_real(x);    }
227 
228 /// return the imaginary part of \b x
IMAG(T x)229 template<typename T> APL_Float IMAG(T x)        { return DZ::get_imag(x);    }
230 
231 /// set the real part of \b x
SREAL(T & y,APL_Float x)232 template<typename T> void  SREAL(T & y, APL_Float x)   { DZ::set_real(y, x); }
233 
234 /// set the imaginary part of \b x
SIMAG(T & y,APL_Float x)235 template<typename T> void  SIMAG(T & y, APL_Float x)   { DZ::set_imag(y, x); }
236 
237 /// set the real and imaginary parts of \b y
Sri(T & y,APL_Float xr,APL_Float xi)238 template<typename T> void Sri(T & y, APL_Float xr, APL_Float xi)
239    { SREAL<T>(y, xr);   SIMAG<T>(y, xi); }
240 
241 /// set the real part of \b y and clear its imaginary part
Sri(T & y,APL_Float xr)242 template<typename T> void Sri(T & y, APL_Float xr)
243    { SREAL<T>(y, xr);   SIMAG<T>(y, 0); }
244 
245 /// return the absolute value of a
ABS(DD a)246 inline DD ABS(DD a) { return a < 0.0 ? DD(-a) : a; }
247 
248 /// return the square of the absolute value of a
ABS_2(DD a)249 inline DD ABS_2(DD a) { return a*a; }
250 
251 /// return the absolute value of z
ABS(ZZ z)252 inline DD ABS(ZZ z) { return sqrt(z.real()*z.real() + z.imag()*z.imag()); }
253 
254 /// return the square of the absolute value of z
ABS_2(ZZ z)255 inline DD ABS_2(ZZ z) { return z.real()*z.real() + z.imag()*z.imag(); }
256 
257 /// return abs(a) with the sign of b
SIGN(APL_Float a,APL_Float b)258 inline APL_Float SIGN(APL_Float a, APL_Float b)
259 {
260    if (b < 0.0)  return -ABS(a);
261    else        return  ABS(a);
262 }
263 
264 /// swap x and y
265 template<typename T>
exchange(T & x,T & y)266 inline void exchange(T & x, T & y)
267 {
268 const T tmp = x;
269   x = y;
270   y = tmp;
271 }
272 //-----------------------------------------------------------------------------
273 /// A vector of real numbers or a vector of complex numbers
274 template<typename T>
275 class Vector
276 {
277 public:
278    /// constructor: vector of length _len, with values _data
279    /// _data must outlive \b this!
Vector(T * _data,ShapeItem _len)280    Vector(T * _data, ShapeItem _len)
281      : data(_data),
282        len (_len)
283    {}
284 
285    /// the first \b new_len elements of \b this
sub_len(ShapeItem new_len) const286    Vector sub_len(ShapeItem new_len) const
287       { assert(new_len <= len);   return Vector(data, new_len); }
288 
289    /// the rest of \b this, starting at \b off
sub_off(ShapeItem off) const290    Vector sub_off(ShapeItem off) const
291       { assert(off <= len);   return Vector(data + off, len - off); }
292 
293    /// \b new_len elements of \b this, starting at \b off
sub_off_len(ShapeItem off,ShapeItem new_len) const294    Vector sub_off_len(ShapeItem off, ShapeItem new_len) const
295       { assert((off + new_len) <= len);
296         return Vector(data + off, new_len); }
297 
298    /// return the number of elements
get_length() const299    const ShapeItem get_length() const { return len; }
300 
301    /// return true if \b this is a vector of complex numbers (and not a vector
302    /// of real numbers
is_ZZ() const303    bool is_ZZ() const   { return DZ::is_ZZ(*data); }
304 
305    /// the norm of \b this
norm() const306    APL_Float norm() const
307       {
308 //     if (len < 1)   return 0.0;
309 //     APL_Float scale = 0.0;
310 //     APL_Float ssq = 1.0;
311 
312         APL_Float ret = 0.0;
313         const T * dj = data;
314         loop(j, len)
315            {
316              const APL_Float re = REAL(*dj);
317              if (re != 0.0)
318                 {
319 /***
320                   const APL_Float temp = ABS(re);
321                   if (scale < temp)
322                      {
323                        const APL_Float scale_temp = scale/temp;
324                        ssq = 1.0 + ssq*scale_temp*scale_temp;
325                        scale = temp;
326                      }
327                   else
328                      {
329                        const APL_Float temp_scale = temp/scale;
330                        ssq += temp_scale*temp_scale;
331                      }
332 ***/
333                   ret += re*re;
334                 }
335 
336              const APL_Float im = IMAG(*dj);
337              if (im != 0.0)
338                 {
339 /***
340                   const APL_Float temp = ABS(im);
341                   if (scale < temp)
342                      {
343                        const APL_Float scale_temp = scale/temp;
344                        ssq = 1.0 + ssq*scale_temp*scale_temp;
345                        scale = temp;
346                      }
347                   else
348                      {
349                        const APL_Float temp_scale = temp/scale;
350                        ssq += temp_scale*temp_scale;
351                      }
352 ***/
353                   ret += im*im;
354                 }
355              ++dj;
356            }
357         return sqrt(ret);
358 //      return scale*sqrt(ssq);
359       }
360 
361    /// multiply \b this by \b factor
scale(T factor)362    void scale(T factor)
363       {
364         T * dj = data;
365         loop(j, len)
366             {
367               *dj = *dj * factor;
368               ++dj;
369             }
370       }
371 
372    /// set all elemens of \b this to 0
clear()373    void clear()
374       {
375         T * dj = data;
376         loop(j, len)   *dj++ = 0.0;
377       }
378 
379    /// return true if \b this is 0
is_zero(ShapeItem count) const380    bool is_zero(ShapeItem count) const
381       {
382         const T * dj = data;
383         loop(j, count)
384            {
385              if (IMAG(*dj) != 0.0)     return false;
386              if (REAL(*dj++) != 0.0)   return false;
387            }
388         return true;
389       }
390 
391    /// conjugate \b this
conjugate()392    void conjugate()
393       {
394         if (!is_ZZ())   return;
395         T * dj = data;
396         loop(j, len)   DZ::conjugate(*dj++);
397       }
398 
399    /// add \b other to \b this
add(const Vector & other)400    void add(const Vector & other)
401       {
402         assert(len == other.len);
403         T * dj = data;
404         T * sj = other.data;
405         loop(j, len)   *dj++ += *sj++;
406       }
407 
408    /// return \b this[i]
at(ShapeItem i)409    T & at(ShapeItem i)
410       { assert(i < len);  return *(data + i); }
411 
412    /// return \b this[i]
at(ShapeItem i) const413    const T & at(ShapeItem i) const
414       { assert(i < len);  return *(data + i); }
415 
416 protected:
417    /// the elements of \b this vector
418    T * const data;
419 
420    ///  the length of this vector
421    const ShapeItem len;
422 };
423 //-----------------------------------------------------------------------------
424 /// A double or complex matrix
425 template<typename T>
426 class Matrix
427 {
428 public:
429    /// constructor: _rows by _cols matrix values from _data
430    /// _data must outlive \b this!
Matrix(T * _data,ShapeItem _rows,ShapeItem _cols,ShapeItem _dx)431    Matrix(T * _data, ShapeItem _rows, ShapeItem _cols, ShapeItem _dx)
432      : data(_data),
433        rows(_rows),
434        cols(_cols),
435        dx(_dx)
436    {}
437 
438    /// constructor: sub-matrix of \b other with \b new_col_count columns
439    /// other must outlive \b this!
Matrix(const Matrix & other,ShapeItem new_col_count)440    Matrix(const Matrix & other, ShapeItem new_col_count)
441      : data(other.data),
442        rows(other.rows),
443        cols(new_col_count),
444        dx(other.dx)
445    { assert(new_col_count <= other.cols); }
446 
447    /// return the sub-matrix starting at row y and column x
sub_yx(ShapeItem row,ShapeItem col)448    Matrix sub_yx(ShapeItem row, ShapeItem col)
449       {
450         assert(col <= cols && row <= rows);
451         return Matrix(&at(row, col), rows - row, cols - col, dx);
452       }
453 
454    /// return the sub-matrix of size new_len_y: new_len_x
455    /// starting at row 0 and column 0
sub_len(ShapeItem new_rows,ShapeItem new_cols)456    Matrix sub_len(ShapeItem new_rows, ShapeItem new_cols)
457       {
458         assert(new_rows <= rows);
459         assert(new_cols <= cols);
460         return Matrix(data, new_rows, new_cols, dx);
461       }
462 
463    /// return true if \b this is a matrix of complex numbers (and not a matrix
464    /// of real numbers
is_ZZ() const465    bool is_ZZ() const   { return DZ::is_ZZ(*data); }
466 
467    /// return the number of rows
get_row_count() const468    const ShapeItem get_row_count() const
469       { return rows; }
470 
471    /// return the number of columns
get_column_count() const472    const ShapeItem get_column_count() const
473       { return cols; }
474 
475    /// return column \b c
get_column(ShapeItem c)476    Vector<T> get_column(ShapeItem c)
477       {
478         return Vector<T>(data + c*dx, rows);
479       }
480 
481    /// return column \b c
get_column(ShapeItem c) const482    const Vector<T> get_column(ShapeItem c) const
483       {
484         return Vector<T>(data + c*dx, rows);
485       }
486 
487    /// return \b this[i, j]
at(ShapeItem i,ShapeItem j)488    T & at(ShapeItem i, ShapeItem j)
489       { assert(i < rows);   assert(j < cols); return *(data + i + j*dx); }
490 
491    /// return \b this[i, j]
at(ShapeItem i,ShapeItem j) const492    const T & at(ShapeItem i, ShapeItem j) const
493       { assert(i < rows);   assert(j < cols); return *(data + i + j*dx); }
494 
495    /// return \b this[i, i]
diag(ShapeItem i) const496    T & diag(ShapeItem i) const
497       { assert(i < rows);   assert(i < cols); return *(data + i*(1 + dx)); }
498 
499    /// swap columns c1 and c2
exchange_columns(ShapeItem c1,ShapeItem c2)500    void exchange_columns(ShapeItem c1, ShapeItem c2)
501       {
502         assert(c1 < cols);
503         assert(c2 < cols);
504         T * p1 = data + c1*dx;
505         T * p2 = data + c2*dx;
506         loop(r, rows)   exchange(*p1++, *p2++);
507       }
508 
509    /// return the distance between two adjacent columns in \b data
get_dx() const510    ShapeItem get_dx() const   { return dx; }
511 
512    /// return the length of the largest element
max_norm() const513    APL_Float max_norm() const
514       {
515         const ShapeItem len = rows * cols;
516         if (is_ZZ())   // complex max. norm
517            {
518              APL_Float ret2 = 0;
519              loop(l, len)
520                 {
521                   const APL_Float a2 = ABS_2(data[l]);
522                   if (ret2 < a2)   ret2 = a2;
523                 }
524              return sqrt(ret2);
525            }
526         else           // real max norm
527            {
528              APL_Float ret = 0;
529              loop(l, len)
530                 {
531                   const APL_Float a = DZ::get_real(data[l]);
532                   if (ret < a)         ret = a;
533                   else if (ret < -a)   ret = -a;
534                 }
535              return ret;
536            }
537       }
538 
539    /// multiply \b this by \b factor
scale(T factor)540    void scale(T factor)
541       {
542         T * dj = data;
543         const ShapeItem len = rows*cols;
544         loop(j, len)
545             {
546                *dj = *dj * factor;
547                ++dj;
548             }
549       }
550 
551 protected:
552    /// the elements of \b this matrix
553    T * const data;
554 
555    /// the number of rows of \b this matrix
556    const ShapeItem rows;
557 
558    /// the number of columns of \b this matrix
559    const ShapeItem cols;
560 
561    /// return the distance between two adjacent columns in \b data
562    const ShapeItem dx;
563 };
564 //=============================================================================
565 
566 #include <math.h>
567 #include <stdio.h>
568 #include <string.h>
569 
570 //=============================================================================
571 
572    // numerical limits
573    //
574 #define dlamch_E 1.11022e-16
575 #define dlamch_S 2.22507e-308
576 #define dlamch_P 2.22045e-16
577 static const APL_Float small_number = dlamch_S / dlamch_P;
578 static const APL_Float big_number   = 1.0 / small_number;
579 static const APL_Float safe_min     =  dlamch_S / dlamch_E;
580 static const APL_Float inv_safe_min = 1.0 / safe_min;
581 static const APL_Float tol3z        = sqrt(dlamch_E);
582 
583 //-----------------------------------------------------------------------------
584 /// return the offset of the largest element in vec
585 inline ShapeItem
max_pos(const APL_Float * vec,ShapeItem len)586 max_pos(const APL_Float * vec, ShapeItem len)
587 {
588 ShapeItem imax = 0;
589 APL_Float dmax = ABS(vec[0]);
590    for (ShapeItem j = 1; j < len; ++j)
591        {
592          const APL_Float dj = ABS(vec[j]);
593          if (dmax < dj )   { dmax = dj;   imax = j; }
594        }
595 
596    return imax;
597 }
598 //-----------------------------------------------------------------------------
599 /// LApack function larfg
larfg(ShapeItem N,T & ALPHA,Vector<T> & x)600 template<typename T> T larfg(ShapeItem N, T & ALPHA, Vector<T> &x)
601 {
602    assert(N > 0);
603 
604    if (N == 1)   return T(0.0);
605 
606 APL_Float xnorm = x.norm();
607 APL_Float alpha_r = REAL(ALPHA);
608 APL_Float alpha_i = IMAG(ALPHA);
609 
610    if (xnorm == 0.0 && alpha_i == 0.0)   return T(0.0);
611 
612 APL_Float beta = -SIGN(sqrt(alpha_r*alpha_r + alpha_i*alpha_i + xnorm*xnorm),
613                     alpha_r);
614 
615 int kcnt = 0;
616    if (ABS(beta) < safe_min)
617        {
618          while (ABS(beta) < safe_min)
619             {
620               ++kcnt;
621               x.scale(inv_safe_min);
622               beta = beta * inv_safe_min;
623               alpha_r = alpha_r * inv_safe_min;
624               alpha_i = alpha_i * inv_safe_min;
625             }
626 
627          xnorm = x.norm();
628          Sri<T>(ALPHA, alpha_r, alpha_i);
629          beta = -SIGN(sqrt(alpha_r*alpha_r +
630                            alpha_i*alpha_i +
631                            xnorm*xnorm), alpha_r);
632        }
633 
634 T tau;
635    Sri<T>(tau, (beta - alpha_r)/beta, -alpha_i/beta);
636 const T factor = DZ::inv(ALPHA - beta);
637    x.scale(factor);
638 
639    loop(k, kcnt)   beta = beta * safe_min;
640    Sri<T>(ALPHA, beta);
641 
642    return tau;
643 }
644 //-----------------------------------------------------------------------------
645 /// LApack function trsm
646 template<typename T>
trsm(int M,int N,const Matrix<T> & A,Matrix<T> & B)647 void trsm(int M, int N, const Matrix<T> & A, Matrix<T> & B)
648 {
649    // only: SIDE   = 'Left'              - lside == true
650    //       UPLO   = 'Upper'             - upper = true
651    //       TRANSA = 'No transpose'      -
652    //       DIAG   = 'Non-unit' and      - nounit = true
653    //       ALPHA  = 1.0 is implemented!
654    //
655    loop(j_0, N)
656        {
657          for (ShapeItem k_0 = M - 1; k_0 >= 0; --k_0)
658              {
659                T & B_kj = B.at(k_0, j_0);
660                if (B_kj != 0.0)
661                   {
662                     B_kj = B_kj / A.diag(k_0);
663                     loop(i_0, k_0)
664                         B.at(i_0, j_0) = B.at(i_0, j_0) - B_kj * A.at(i_0, k_0);
665                   }
666              }
667        }
668 }
669 //-----------------------------------------------------------------------------
670 /// LApack function ila_lc
671 template<typename T>
ila_lc(ShapeItem M,ShapeItem N,const Matrix<T> & A)672 int ila_lc(ShapeItem M, ShapeItem N, const Matrix<T> & A)
673 {
674    for (ShapeItem col = N - 1; col > 0; --col)
675        {
676          const Vector<T> column = A.get_column(col);
677          if (!column.is_zero(M))   return col + 1;
678 
679          /* the above is the same as:
680 
681          for (ShapeItem row = 0; row < M; ++row)
682              {
683                if (REAL(A[row + col*LDA]) != 0)   return col + 1;
684                if (IMAG(A[row + col*LDA]) != 0)   return col + 1;
685              }
686 
687           */
688        }
689 
690    return 1;
691 }
692 //-----------------------------------------------------------------------------
693 /// LApack function gemv
694 template<typename T>
gemv(int M,int N,const Matrix<T> & A,const Vector<T> & x,Vector<T> & y)695 inline void gemv(int M, int N, const Matrix<T> & A, const Vector<T> &x,
696                  Vector<T> &y)
697 {
698    loop(j, N)
699        {
700          T temp(0.0);
701          loop(i, M)   temp = temp + DZ::CONJ(A.at(i, j)) * x.at(i);
702          y.at(j) = temp;
703        }
704 }
705 //-----------------------------------------------------------------------------
706 /// LApack function gerc
707 template<typename T>
gerc(int M,int N,T ALPHA,const Vector<T> & x,const Vector<T> & y,Matrix<T> & A)708 void gerc(int M, int N, T ALPHA, const Vector<T> &x, const Vector<T> &y,
709           Matrix<T> & A)
710 {
711    if (M == 0 || N == 0 || ALPHA == 0.0)   return;
712 
713    loop(j, N)
714       {
715         T Y_j = y.at(j);
716         if (Y_j != 0.0)
717            {
718              DZ::conjugate(Y_j);
719              Y_j = Y_j * ALPHA;
720              loop(i, M)   A.at(i, j) = A.at(i, j) + Y_j * x.at(i);
721            }
722       }
723 }
724 //-----------------------------------------------------------------------------
725 /// LApack function larf
726 template<typename T>
larf(Vector<T> & v,T tau,Matrix<T> & c)727 void larf(Vector<T> & v, T tau, Matrix<T> & c)
728 {
729 const ShapeItem M = c.get_row_count();
730 const ShapeItem N = c.get_column_count();
731 
732    // only SIDE == "Left" is implemented
733    //
734 ShapeItem  lastv = 0;
735 ShapeItem  lastc = 0;
736 
737    if (tau != 0.0)
738      {
739        // Look for the last non-zero row in V
740        //
741        lastv = M;
742        while (lastv > 0 && v.at(lastv - 1) == 0.0)   --lastv;
743 
744        // Scan for the last non-zero column in C(1:lastv,:)
745        //
746        lastc = ila_lc<T>(lastv, N, c);
747      }
748 
749    if (lastv)
750       {
751         APL_Float * gemv_data = new APL_Float[2*N];   // N double or N complex
752         Vector<T> gemv_result(reinterpret_cast<T *>(gemv_data), N);
753 
754         gemv<T>(lastv, lastc, c, v, gemv_result);
755         gerc<T>(lastv, lastc, -tau, v, gemv_result, c);
756         delete[] gemv_data;
757       }
758 }
759 //-----------------------------------------------------------------------------
760 /// LApack function laqp2
761 template<typename T>
laqp2(Matrix<T> & A,ShapeItem * pivot,T * tau,APL_Float * vn1)762 void laqp2(Matrix<T> & A, ShapeItem * pivot, T * tau, APL_Float * vn1)
763 {
764 const ShapeItem M = A.get_row_count();
765 const ShapeItem N = A.get_column_count();
766 
767 APL_Float * vn2 = vn1 + N;
768 
769    loop(i_0, N)
770       {
771         T & tau_i = tau[i_0];
772 
773         // Determine the i'th pivot column and swap if necessary.
774         //
775         const int pvt_0 = i_0 + max_pos(vn1 + i_0, N - i_0);
776 
777         if (pvt_0 != i_0)
778            {
779              A.exchange_columns(pvt_0, i_0);
780              exchange(pivot[pvt_0], pivot[i_0]);
781              vn1[pvt_0] = vn1[i_0];
782              vn2[pvt_0] = vn2[i_0];
783            }
784 
785         // Generate elementary reflector H(i).
786         //
787         {
788           if (i_0 < (M - 1))
789              {
790                Vector<T> x(&A.at(i_0 + 1, i_0), M - i_0 - 1);
791                T & alpha = *(&x.at(0) - 1);   // one before x
792                tau_i = larfg<T>(M - i_0, alpha, x);
793              }
794           else
795              {
796                Vector<T> x(&A.at(M - 1, i_0), 1);
797                T & alpha = x.at(0);           // at x
798                tau_i = larfg<T>(1, alpha, x);
799              }
800         }
801 
802         if (i_0 < (N - 1))
803            {
804              // Apply H(i)**H to A(offset+i:m,i+1:n) from the left.
805              //
806              const T Aii = A.diag(i_0);   // save diag
807              A.diag(i_0) = APL_Float(1.0);
808                 Vector<T> v(&A.diag(i_0), M - i_0);
809                 Matrix<T> c = A.sub_yx(i_0, i_0 + 1);
810                 larf<T>(v, DZ::CONJ(tau_i), c);
811              A.diag(i_0) = Aii;           // restore diag
812            }
813 
814         // Update partial column norms.
815         //
816         for (ShapeItem j_0 = i_0 + 1; j_0 < N; ++j_0)
817             {
818               if (vn1[j_0] != 0.0)
819                  {
820                    APL_Float temp = ABS(A.at(i_0, j_0)) / vn1[j_0];
821                    temp = 1.0 - temp*temp;
822                    temp = MAX(temp, APL_Float(0.0));
823 
824                    APL_Float temp2 = vn1[j_0] / vn2[j_0];
825                    temp2 = temp * temp2 * temp2;
826                    if (temp2 <= tol3z)
827                       {
828                         if (i_0 < (M - 1))
829                            {
830                              Vector<T> x(&A.at(i_0 + 1, j_0), M - i_0 - 1);
831                              vn1[j_0] = x.norm();
832                              vn2[j_0] = vn1[j_0];
833                            }
834                         else
835                            {
836                              vn1[j_0] = 0.0;
837                              vn2[j_0] = 0.0;
838                            }
839                       }
840                    else
841                       {
842                         vn1[j_0] = vn1[j_0] * sqrt(temp);
843                       }
844                  }
845             }
846       }
847 }
848 //-----------------------------------------------------------------------------
849 /// LApack function laic1 (maximum case)
850 template<typename T>
laic1_max(APL_Float SEST,T alpha,T GAMMA,APL_Float & SESTPR,T & S,T & C)851 void laic1_max(APL_Float SEST, T alpha, T GAMMA, APL_Float &SESTPR, T &S, T &C)
852 {
853 const APL_Float eps = dlamch_E;
854 const APL_Float abs_alpha = ABS(alpha);
855 const APL_Float abs_gamma = ABS(GAMMA);
856 const APL_Float abs_estimate = ABS(SEST);
857 
858    //    Estimating largest singular value ...
859    //
860 
861    //    special cases
862    //
863    if (SEST == 0.0)
864       {
865         const APL_Float s1 = MAX(abs_gamma, abs_alpha);
866         if (s1 == 0.0)
867            {
868              S = T(0.0);
869              C = T(1.0);
870              SESTPR = 0.0;
871            }
872         else
873            {
874              S = alpha / s1;
875              C = GAMMA / s1;
876 
877              const APL_Float tmp = sqrt(ABS_2(S) + ABS_2(C));
878              S = S / tmp;
879              C = C / tmp;
880              SESTPR = s1*tmp;
881            }
882         return;
883       }
884 
885    if (abs_gamma <= eps*abs_estimate)
886       {
887         S = T(1.0);
888         C = T(0.0);
889         APL_Float tmp = MAX(abs_estimate, abs_alpha);
890         const APL_Float s1 = abs_estimate / tmp;
891         const APL_Float s2 = abs_alpha / tmp;
892         SESTPR = tmp*sqrt(s1*s1 + s2*s2);
893         return;
894       }
895 
896    if (abs_alpha <= eps*abs_estimate)
897       {
898         const APL_Float s1 = abs_gamma;
899         const APL_Float s2 = abs_estimate;
900         if (s1 <= s2)
901            {
902              S = T(1.0);
903              C = T(0.0);
904              SESTPR = s2;
905            }
906         else
907            {
908              S = T(0.0);
909              C = T(1.0);
910              SESTPR = s1;
911            }
912         return;
913       }
914 
915    if (abs_estimate <= eps*abs_alpha || abs_estimate <= eps*abs_gamma)
916       {
917         const APL_Float s1 = abs_gamma;
918         const APL_Float s2 = abs_alpha;
919         if (s1 <= s2)
920            {
921              const APL_Float tmp = s1 / s2;
922              const APL_Float scl = sqrt(1.0 + tmp*tmp);
923              SESTPR = s2*scl;
924              S = (alpha / s2) / scl;
925              C = (GAMMA / s2) / scl;
926            }
927         else
928            {
929              const APL_Float tmp = s2 / s1;
930              const APL_Float scl = sqrt(1.0 + tmp*tmp);
931              SESTPR = s1*scl;
932              S = (alpha / s1) / scl;
933              C = (GAMMA / s1) / scl;
934            }
935         return;
936       }
937 
938    // normal case
939    //
940 const APL_Float zeta1 = abs_alpha / abs_estimate;
941 const APL_Float zeta2 = abs_gamma / abs_estimate;
942 const APL_Float b = (1.0 - zeta1*zeta1 - zeta2*zeta2)*0.5;
943 const APL_Float zeta1_2 = zeta1*zeta1;
944 APL_Float t;
945 
946    if (b > 0.0)  t = zeta1_2 / (b + sqrt(b*b + zeta1_2));
947    else          t = sqrt(b*b + zeta1_2) - b;
948 
949 const T sine = -( alpha / abs_estimate ) / t;
950 const T cosine = -( GAMMA / abs_estimate ) / ( 1.0 + t);
951 const APL_Float tmp = sqrt(ABS_2(sine) + ABS_2(cosine));
952    S = sine / tmp;
953    C = cosine / tmp;
954    SESTPR = sqrt(t + 1.0) * abs_estimate;
955 }
956 //-----------------------------------------------------------------------------
957 /// LApack function laic1 (minimum case)
958 template<typename T>
laic1_min(APL_Float SEST,T alpha,T GAMMA,APL_Float & SESTPR,T & S,T & C)959 void laic1_min(APL_Float SEST, T alpha, T GAMMA, APL_Float &SESTPR, T &S, T &C)
960 {
961 const APL_Float eps = dlamch_E;
962 const APL_Float abs_alpha = ABS(alpha);
963 const APL_Float abs_gamma = ABS(GAMMA);
964 const APL_Float abs_estimate = ABS(SEST);
965 
966    //    Estimating smallest singular value ...
967    //
968 
969    //    special cases
970    //
971    if (SEST == 0.0)
972       {
973         SESTPR = 0.0;
974         T sine(1.0);
975         T cosine(0.0);
976         if (abs_gamma > 0.0 || abs_alpha > 0.0)
977            {
978              sine   = -DZ::CONJ(GAMMA);
979              cosine =  DZ::CONJ(alpha);
980            }
981 
982         const APL_Float abs_sine = ABS(sine);
983         const APL_Float abs_cosine = ABS(cosine);
984         const APL_Float s1 = MAX(abs_sine, abs_cosine);
985         const T sine_s1 = sine / s1;
986         const T cosine_s1 = cosine / s1;
987         S = sine_s1;
988         C = cosine_s1;
989         const APL_Float tmp = sqrt(ABS_2(sine_s1) + ABS_2(cosine_s1));
990         S = S / tmp;
991         C = C / tmp;
992         return;
993       }
994 
995    if (abs_gamma <= eps*abs_estimate)
996       {
997         S = T(0.0);
998         C = T(1.0);
999         SESTPR = abs_gamma;
1000         return;
1001       }
1002 
1003    if (abs_alpha <= eps*abs_estimate)
1004       {
1005         const APL_Float s1 = abs_gamma;
1006         const APL_Float s2 = abs_estimate;
1007 
1008         if (s1 <= s2)
1009           {
1010             S = T(0.0);
1011             C = T(1.0);
1012             SESTPR = s1;
1013           }
1014         else
1015           {
1016             S = T(1.0);
1017             C = T(0.0);
1018             SESTPR = s2;
1019           }
1020         return;
1021       }
1022 
1023    if (abs_estimate <= eps*abs_alpha || abs_estimate <= eps*abs_gamma)
1024       {
1025         const APL_Float s1 = abs_gamma;
1026         const APL_Float s2 = abs_alpha;
1027         const T conj_gamma = DZ::CONJ(GAMMA);
1028         const T conj_alpha = DZ::CONJ(alpha);
1029 
1030         if (s1 <= s2)
1031            {
1032              const APL_Float tmp = s1 / s2;
1033              const APL_Float scl = sqrt(1.0 + tmp*tmp);
1034              const APL_Float tmp_scl = tmp / scl;
1035 
1036              SESTPR = abs_estimate * tmp_scl;
1037              S = -(conj_gamma / s2 ) / scl;
1038              C =  (conj_alpha / s2 ) / scl;
1039            }
1040         else
1041            {
1042              const APL_Float tmp = s2 / s1;
1043              const APL_Float scl = sqrt(1.0 + tmp*tmp);
1044 
1045              SESTPR = abs_estimate / scl;
1046              S = -( conj_gamma / s1 ) / scl;
1047              C = ( conj_alpha  / s1 ) / scl;
1048            }
1049         return;
1050       }
1051 
1052    // normal case
1053    //
1054 const APL_Float zeta1 = abs_alpha / abs_estimate;
1055 const APL_Float zeta2 = abs_gamma / abs_estimate;
1056 
1057 const APL_Float norma_1 = 1.0 + zeta1*zeta1 + zeta1*zeta2;
1058 const APL_Float norma_2 = zeta1*zeta2 + zeta2*zeta2;
1059 const APL_Float norma = MAX(norma_1, norma_2);
1060 
1061 const APL_Float test = 1.0 + 2.0*(zeta1-zeta2)*(zeta1+zeta2);
1062 T sine;
1063 T cosine;
1064    if (test >= 0.0 )
1065       {
1066         // root is close to zero, compute directly
1067         //
1068         const APL_Float b = (zeta1*zeta1 + zeta2*zeta2 + 1.0)*0.5;
1069         const APL_Float zeta2_2 = zeta2*zeta2;
1070         const APL_Float t = zeta2_2 / (b + sqrt(ABS(b*b - zeta2_2)));
1071         sine = (alpha / abs_estimate) / (1.0 - t);
1072         cosine = -( GAMMA / abs_estimate ) / t;
1073         SESTPR = sqrt(t + 4.0*eps*eps*norma)*abs_estimate;
1074       }
1075    else
1076       {
1077         // root is closer to ONE, shift by that amount
1078         //
1079         const APL_Float b = (zeta2*zeta2 + zeta1*zeta1 - 1.0)*0.5;
1080         const APL_Float zeta1_2 = zeta1*zeta1;
1081         APL_Float t;
1082         if (b >= 0.0)   t = -zeta1_2 / (b+sqrt(b*b + zeta1_2));
1083         else            t = b - sqrt(b*b + zeta1_2);
1084 
1085         sine = -( alpha / abs_estimate ) / t;
1086         cosine = -( GAMMA / abs_estimate ) / (1.0 + t);
1087         SESTPR = sqrt(1.0 + t + 4.0*eps*eps*norma)*abs_estimate;
1088       }
1089 
1090 const APL_Float tmp = sqrt(ABS_2(sine) + ABS_2(cosine));
1091    S = sine / tmp;
1092    C = cosine / tmp;
1093 }
1094 //-----------------------------------------------------------------------------
1095 /// LApack function unm2r
1096 template<typename T>
unm2r(ShapeItem K,Matrix<T> & A,const T * tau,Matrix<T> & c)1097 void unm2r(ShapeItem K, Matrix<T> & A, const T * tau, Matrix<T> & c)
1098 {
1099 const ShapeItem M = c.get_row_count();
1100 
1101    // only SIDE == "Left" is implemented
1102    // only TRANS = 'T' or 'C' is implemented
1103    // thus NOTRANS is always false
1104    //
1105    loop(i_0, K)
1106        {
1107          // H(i) or H(i)**H is applied to C(i:m,1:n)
1108          //
1109          const int MM = M - i_0;
1110 
1111          // Apply H(i) or H(i)**H
1112          //
1113          const T taui = DZ::CONJ(tau[i_0]);
1114 
1115          const T Aii = A.diag(i_0);
1116          A.diag(i_0) = APL_Float(1.0);
1117             Vector<T> v(&A.diag(i_0), MM);
1118             Matrix<T> c1 = c.sub_yx(i_0, 0);
1119             larf<T>(v, taui, c1);
1120          A.diag(i_0) = Aii;
1121        }
1122 }
1123 //-----------------------------------------------------------------------------
1124 /// LApack function geqp3
1125 template<typename T>
geqp3(Matrix<T> & A,ShapeItem * pivot,T * tau)1126 void geqp3(Matrix<T> & A, ShapeItem * pivot, T * tau)
1127 {
1128 const ShapeItem M = A.get_row_count();
1129 const ShapeItem N = A.get_column_count();
1130 
1131    // init column permutaion for pivoting
1132    // so we simply create the identical permutation
1133    //
1134    loop(j_0, N)   pivot[j_0] = j_0;
1135 
1136    // (no fixed columns)
1137 
1138    // Factorize free columns
1139    // ======================
1140    //
1141    {
1142      // Initialize partial column norms.
1143      // the first N elements of WORK store the exact column norms.
1144      //
1145      APL_Float * vn12 = new APL_Float[2*N];
1146      for (int j_0 = 0; j_0 < N; ++j_0)
1147          {
1148            Vector<T> x(&A.at(0, j_0), M);
1149            vn12[N + j_0] = vn12[j_0] = x.norm();
1150          }
1151 
1152      // Use unblocked code to factor the last or only block
1153      //
1154      laqp2<T>(A, pivot, tau, vn12);
1155      delete[] vn12;
1156    }
1157 }
1158 //-----------------------------------------------------------------------------
1159 /// LApack function estimating the rank of \b A
1160 template<typename T>
estimate_rank(const Matrix<T> & A,APL_Float rcond)1161 int estimate_rank(const Matrix<T> & A, APL_Float rcond)
1162 {
1163    // Determine RANK using incremental condition estimation...
1164 
1165 const ShapeItem N = A.get_column_count();
1166 
1167 APL_Float smax = ABS(A.diag(0));
1168 APL_Float smin = smax;
1169    if (smax == 0.0)   return 0;
1170 
1171    // store minima in work_min[ 0 ... N]
1172    // store maxima in work_max == work_min[N ... 2N]
1173    //
1174 APL_Float * work_1 = new APL_Float[4*N];
1175 T * work_min = reinterpret_cast<T *>(work_1);
1176 T * work_max = work_min + N;
1177 
1178    work_min[0] = T(1.0);
1179    work_max[0] = T(1.0);
1180 
1181    for (int RANK = 1; RANK < N; ++RANK)
1182        {
1183          APL_Float sminpr(0.0);
1184          APL_Float smaxpr(0.0);
1185          T s1(0.0);
1186          T s2(0.0);
1187          T c1(0.0);
1188          T c2(0.0);
1189          const T gamma(A.diag(RANK));
1190 
1191          T alpha_min(0.0);
1192          T alpha_max(0.0);
1193          for (int r = 0; r < RANK; ++r)
1194              {
1195                alpha_min = alpha_min + DZ::CONJ(work_min[r] * A.at(r, RANK));
1196                alpha_max = alpha_max + DZ::CONJ(work_max[r] * A.at(r, RANK));
1197              }
1198 
1199          laic1_min<T>(smin, alpha_min, gamma, sminpr, s1, c1);
1200          laic1_max<T>(smax, alpha_max, gamma, smaxpr, s2, c2);
1201 
1202          if (smaxpr*rcond > sminpr)
1203             {
1204               delete[] work_1;
1205               return RANK;
1206             }
1207 
1208          loop(cI, RANK)
1209               {
1210                 work_min[cI] = work_min[cI] * s1;
1211                 work_max[cI] = work_max[cI] * s2;
1212               }
1213          work_min[RANK] = c1;
1214          work_max[RANK] = c2;
1215          smin = sminpr;
1216          smax = smaxpr;
1217        }
1218 
1219    delete[] work_1;
1220    return N;
1221 }
1222 //-----------------------------------------------------------------------------
1223 template<typename T>
scaled_gelsy(Matrix<T> & A,Matrix<T> & B,double rcond)1224 int scaled_gelsy(Matrix<T> & A, Matrix<T> & B, double rcond)
1225 {
1226    // gelsy is optimized for (and restricted to) the following conditions:
1227    //
1228    // 0 < N <= M  →  min_NM = N and max_MN = M
1229    // 0 < NRHS
1230    //
1231 const ShapeItem N    = A.get_column_count();
1232 const ShapeItem NRHS = B.get_column_count();
1233 
1234    // Compute QR factorization with column pivoting of A:
1235    // A * P = Q * R
1236    //
1237 char * pivot_tau_tmp = new char[N*(sizeof(ShapeItem) + 4*sizeof(APL_Float))];
1238 
1239    // N pivot items
1240 ShapeItem * pivot = reinterpret_cast<ShapeItem *>(pivot_tau_tmp);
1241 T * tau = reinterpret_cast<T *>(pivot + N);       // N complex tau items
1242 T * tmp = tau + N;                                // N complex tmp1 items
1243 
1244    geqp3<T>(A, pivot, tau);
1245 
1246    // Details of Householder rotations stored in WORK(1:MN).
1247    //
1248    // Determine RANK using incremental condition estimation
1249    //
1250    {
1251      const int RANK = estimate_rank(A, rcond);
1252      if (RANK < N)
1253         {
1254           delete[] pivot_tau_tmp;
1255           return RANK;
1256         }
1257    }
1258 
1259    // from here on, RANK == N. We leave RANK in the comments but use N un
1260    // the code.
1261 
1262    // Logically partition R = [ R11 R12 ]
1263    //                         [  0  R22 ]
1264    // where R11 = R(1:RANK,1:RANK)
1265    // [R11,R12] = [ T11, 0 ] * Y
1266    //
1267 
1268    // Details of Householder rotations stored in WORK(MN+1:2*MN)
1269    //
1270    // B(1:M, 1:NRHS) := Q**H * B(1:M,1:NRHS)
1271    //
1272    unm2r<T>(N, A, tau, B);
1273 
1274    // B(1:RANK, 1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
1275    //
1276    {
1277      Matrix<T> B1 = B.sub_len(B.get_dx(), NRHS);
1278      trsm<T>(N, NRHS, A, B1);
1279    }
1280 
1281    // workspace: 2*MN+NRHS
1282    //
1283    // B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
1284    //
1285    loop(j_0, NRHS)
1286       {
1287         loop(i_0, N)   tmp[pivot[i_0]] = B.at(i_0, j_0);
1288         loop(i_0, N)   B.at(i_0, j_0) = tmp[i_0];
1289       }
1290 
1291    delete[] pivot_tau_tmp;
1292    return N;
1293 }
1294 //-----------------------------------------------------------------------------
1295 template<typename T>
gelsy(Matrix<T> & A,Matrix<T> & B,APL_Float rcond)1296 int gelsy(Matrix<T> & A, Matrix<T> &B, APL_Float rcond)
1297 {
1298 const ShapeItem M    = A.get_row_count();
1299 const ShapeItem N    = A.get_column_count();
1300 const ShapeItem NRHS = B.get_column_count();
1301 
1302    // APL is responsible for handling the empty cases
1303    //
1304    Assert(M && N && NRHS && N <= M);
1305 
1306    // For better precision, scale A and B so that their max. element lies
1307    // between small_number and big_number. Then call scaled_gelsy() and
1308    // scale the result back by the same factors.
1309    //
1310 APL_Float scale_A = 1.0;
1311    {
1312      const APL_Float norm_A = A.max_norm();
1313 
1314      if (norm_A == 0.0)                return 0;   // A is rank-deficient
1315      else if (norm_A < small_number)   A.scale(scale_A = big_number);
1316      else if (norm_A > big_number)     A.scale(scale_A = small_number);
1317    }
1318 
1319 APL_Float scale_B = 1.0;
1320    {
1321      const APL_Float norm_B = B.max_norm();
1322 
1323      if (norm_B == 0.0)                /* OK */ ;
1324      else if (norm_B < small_number)   B.scale(scale_B = big_number);
1325      else if (norm_B > big_number)     B.scale(scale_B = small_number);
1326    }
1327 
1328    {
1329      const int RANK = scaled_gelsy(A, B, rcond);
1330      if (RANK < N)   return RANK;   // this is an error
1331    }
1332 
1333    // Undo scaling
1334    //
1335    if (scale_A != 1.0)
1336       {
1337         Matrix<T> A1 = A.sub_len(N, N);
1338         Matrix<T> B1 = B.sub_len(N, NRHS);
1339         B1.scale(APL_Float(1.0/scale_A));
1340         A1.scale(scale_A);
1341       }
1342 
1343    if (scale_B != 1.0)
1344       {
1345         Matrix<T> B1 = B.sub_len(N, NRHS);
1346         B1.scale(scale_B);
1347       }
1348 
1349    return N;   // success
1350 }
1351 //=============================================================================
1352 
1353 #endif // __GELSY_HH_DEFINED__
1354