1 /*
2 This file is a port of the 'dgelsy' and 'zgelsy' functions (and the
3 functions that they call) from liblapack to C++.
4
5 liblapack has the following license/copyright notice,
6 see http://www.netlib.org/lapack/LICENSE:
7
8 Copyright (c) 1992-2011 The University of Tennessee and The University
9 of Tennessee Research Foundation. All rights
10 reserved.
11 Copyright (c) 2000-2011 The University of California Berkeley. All
12 rights reserved.
13 Copyright (c) 2006-2011 The University of Colorado Denver. All rights
14 reserved.
15
16
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are
19 met:
20
21 - Redistributions of source code must retain the above copyright
22 notice, this list of conditions and the following disclaimer.
23
24 - Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer listed
26 in this license in the documentation and/or other materials
27 provided with the distribution.
28
29 - Neither the name of the copyright holders nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
37 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46 /*
47 This file is part of GNU APL, a free implementation of the
48 ISO/IEC Standard 13751, "Programming Language APL, Extended"
49
50 Copyright (C) 2008-2015 Dr. Jürgen Sauermann
51
52 This program is free software: you can redistribute it and/or modify
53 it under the terms of the GNU General Public License as published by
54 the Free Software Foundation, either version 3 of the License, or
55 (at your option) any later version.
56
57 This program is distributed in the hope that it will be useful,
58 but WITHOUT ANY WARRANTY; without even the implied warranty of
59 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
60 GNU General Public License for more details.
61
62 You should have received a copy of the GNU General Public License
63 along with this program. If not, see <http://www.gnu.org/licenses/>.
64 */
65
66 #ifndef __GELSY_HH_DEFINED__
67 #define __GELSY_HH_DEFINED__
68
69 #ifndef assert
70 # define assert(x)
71 #endif
72
73 #include <stdio.h>
74 #include <stdlib.h>
75
76 using namespace std;
77 /// a real number
78 typedef APL_Float DD;
79
80 /// a complex number
81 class ZZ
82 {
83 public:
84 /// 0J0
ZZ()85 ZZ()
86 : _r(0),
87 _i(0)
88 {}
89
90 /// rJ0
ZZ(APL_Float r)91 ZZ(APL_Float r)
92 : _r(r),
93 _i(0)
94 {}
95
96 /// rJi
ZZ(APL_Float r,APL_Float i)97 ZZ(APL_Float r, APL_Float i)
98 : _r(r),
99 _i(i)
100 {}
101
102 /// return the real part of \b this
real() const103 APL_Float real() const { return _r; }
104
105 /// set the real part of \b this
set_real(APL_Float x)106 void set_real(APL_Float x) { _r = x; }
107
108 /// return the imag part of \b this
imag() const109 APL_Float imag() const { return _i; }
110
111 /// set the imag part of \b this
set_imag(APL_Float x)112 void set_imag(APL_Float x) { _i = x; }
113
114 /// conjugate \b this
conjugate()115 void conjugate() { _i = - _i; }
116
117 /// return true if \b this is real and not equal to \b dd
operator !=(APL_Float dd) const118 bool operator != (APL_Float dd) const { return _r != dd || _i != 0.0; }
119
120 /// return true if \b this is real and equal to \b dd
operator ==(APL_Float dd) const121 bool operator == (APL_Float dd) const { return _r == dd && _i == 0.0; }
122
123 /// add \b r zz to \b this
operator +(const ZZ & zz) const124 ZZ operator +(const ZZ & zz) const
125 { return ZZ(_r + zz._r, _i + zz._i); }
126
127 /// subtract \b zz from \b this
operator -(const ZZ & zz) const128 ZZ operator -(const ZZ & zz) const
129 { return ZZ(_r - zz._r, _i - zz._i); }
130
131 /// negate \b this
operator -() const132 ZZ operator -() const
133 { return ZZ(-_r, -_i); }
134
135 /// multiply \b zz and \b this
operator *(const ZZ & zz) const136 ZZ operator *(const ZZ & zz) const
137 { return ZZ(_r * zz._r - _i * zz._i,
138 _i * zz._r + _r * zz._i); }
139
140 /// divide \b this by \b d
operator /(APL_Float d) const141 ZZ operator /(APL_Float d) const
142 { return ZZ(_r/d, _i/d); }
143
144 /// divide \b this by \b zz
operator /(const ZZ & zz) const145 ZZ operator /(const ZZ & zz) const
146 {
147 const APL_Float denom = zz._r * zz._r + zz._i * zz._i;
148 return ZZ((_r * zz._r + _i * zz._i) / denom,
149 (_i * zz._r - _r * zz._i) / denom);
150 }
151
152 protected:
153 /// the real part
154 APL_Float _r;
155
156 /// the imaginary part
157 APL_Float _i;
158 };
159
160 /// the maximum of \b x and \b y
161 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
162
163 /// the minimum of \b x and \b y
164 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
165
166 /** a single double or complex number. This class contains wrappers to
167 double or complex numbers so that they can be used in templates.
168 */
169 /// A single double or complex number
170 class DZ
171 {
172 public:
173 /// return true if the template argument (DD or ZZ) is complex (i.e. ZZ)
is_ZZ(DD x)174 static bool is_ZZ(DD x) { return false; }
175 /// return true if the template argument (DD or ZZ) is complex (i.e. ZZ)
is_ZZ(ZZ z)176 static bool is_ZZ(ZZ z) { return true; }
177
178 /// return the real part of \b x
get_real(DD x)179 static APL_Float get_real(DD x) { return x; }
180
181 /// return the imaginary part of \b x
get_imag(DD x)182 static APL_Float get_imag(DD x) { return 0.0; }
183
184 /// return the real part of \b x
get_real(ZZ z)185 static APL_Float get_real(ZZ z) { return z.real(); }
186
187 /// return the imaginary part of \b x
get_imag(ZZ z)188 static APL_Float get_imag(ZZ z) { return z.imag(); }
189
190 /// set the real part of \b x
set_real(DD & y,DD x)191 static void set_real(DD & y, DD x) { y = x; }
192
193 /// set the imaginary part of \b x
set_imag(DD & y,DD x)194 static void set_imag(DD & y, DD x) { assert(x == 0.0); }
195
196 /// set the real part of \b x
set_real(ZZ & y,DD x)197 static void set_real(ZZ & y, DD x) { y.set_real(x); }
198
199 /// set the imaginary part of \b x
set_imag(ZZ & y,DD x)200 static void set_imag(ZZ & y, DD x) { y.set_imag(x); }
201
202 /// conjugate \b x
conjugate(DD x)203 static void conjugate(DD x) { }
204
205 /// conjugate \b z
conjugate(ZZ & z)206 static void conjugate(ZZ & z) { z.conjugate(); }
207
208 /// return \b x conjugated
CONJ(DD x)209 static DD CONJ(DD x) { return x; }
210
211 /// return \b z conjugated
CONJ(const ZZ & z)212 static ZZ CONJ(const ZZ & z) { return ZZ(z.real(), -z.imag()); }
213
214 /// return the inverse of \b dd
inv(DD dd)215 static DD inv(DD dd) { return 1.0 / dd; }
216
217 /// return the inverse of \b zz
inv(const ZZ & zz)218 static inline ZZ inv(const ZZ & zz)
219 {
220 const APL_Float denom = zz.real() * zz.real() + zz.imag() * zz.imag();
221 return ZZ(zz.real()/denom, - zz.imag()/denom);
222 }
223 };
224
225 /// return the real part of \b x
REAL(T x)226 template<typename T> APL_Float REAL(T x) { return DZ::get_real(x); }
227
228 /// return the imaginary part of \b x
IMAG(T x)229 template<typename T> APL_Float IMAG(T x) { return DZ::get_imag(x); }
230
231 /// set the real part of \b x
SREAL(T & y,APL_Float x)232 template<typename T> void SREAL(T & y, APL_Float x) { DZ::set_real(y, x); }
233
234 /// set the imaginary part of \b x
SIMAG(T & y,APL_Float x)235 template<typename T> void SIMAG(T & y, APL_Float x) { DZ::set_imag(y, x); }
236
237 /// set the real and imaginary parts of \b y
Sri(T & y,APL_Float xr,APL_Float xi)238 template<typename T> void Sri(T & y, APL_Float xr, APL_Float xi)
239 { SREAL<T>(y, xr); SIMAG<T>(y, xi); }
240
241 /// set the real part of \b y and clear its imaginary part
Sri(T & y,APL_Float xr)242 template<typename T> void Sri(T & y, APL_Float xr)
243 { SREAL<T>(y, xr); SIMAG<T>(y, 0); }
244
245 /// return the absolute value of a
ABS(DD a)246 inline DD ABS(DD a) { return a < 0.0 ? DD(-a) : a; }
247
248 /// return the square of the absolute value of a
ABS_2(DD a)249 inline DD ABS_2(DD a) { return a*a; }
250
251 /// return the absolute value of z
ABS(ZZ z)252 inline DD ABS(ZZ z) { return sqrt(z.real()*z.real() + z.imag()*z.imag()); }
253
254 /// return the square of the absolute value of z
ABS_2(ZZ z)255 inline DD ABS_2(ZZ z) { return z.real()*z.real() + z.imag()*z.imag(); }
256
257 /// return abs(a) with the sign of b
SIGN(APL_Float a,APL_Float b)258 inline APL_Float SIGN(APL_Float a, APL_Float b)
259 {
260 if (b < 0.0) return -ABS(a);
261 else return ABS(a);
262 }
263
264 /// swap x and y
265 template<typename T>
exchange(T & x,T & y)266 inline void exchange(T & x, T & y)
267 {
268 const T tmp = x;
269 x = y;
270 y = tmp;
271 }
272 //-----------------------------------------------------------------------------
273 /// A vector of real numbers or a vector of complex numbers
274 template<typename T>
275 class Vector
276 {
277 public:
278 /// constructor: vector of length _len, with values _data
279 /// _data must outlive \b this!
Vector(T * _data,ShapeItem _len)280 Vector(T * _data, ShapeItem _len)
281 : data(_data),
282 len (_len)
283 {}
284
285 /// the first \b new_len elements of \b this
sub_len(ShapeItem new_len) const286 Vector sub_len(ShapeItem new_len) const
287 { assert(new_len <= len); return Vector(data, new_len); }
288
289 /// the rest of \b this, starting at \b off
sub_off(ShapeItem off) const290 Vector sub_off(ShapeItem off) const
291 { assert(off <= len); return Vector(data + off, len - off); }
292
293 /// \b new_len elements of \b this, starting at \b off
sub_off_len(ShapeItem off,ShapeItem new_len) const294 Vector sub_off_len(ShapeItem off, ShapeItem new_len) const
295 { assert((off + new_len) <= len);
296 return Vector(data + off, new_len); }
297
298 /// return the number of elements
get_length() const299 const ShapeItem get_length() const { return len; }
300
301 /// return true if \b this is a vector of complex numbers (and not a vector
302 /// of real numbers
is_ZZ() const303 bool is_ZZ() const { return DZ::is_ZZ(*data); }
304
305 /// the norm of \b this
norm() const306 APL_Float norm() const
307 {
308 // if (len < 1) return 0.0;
309 // APL_Float scale = 0.0;
310 // APL_Float ssq = 1.0;
311
312 APL_Float ret = 0.0;
313 const T * dj = data;
314 loop(j, len)
315 {
316 const APL_Float re = REAL(*dj);
317 if (re != 0.0)
318 {
319 /***
320 const APL_Float temp = ABS(re);
321 if (scale < temp)
322 {
323 const APL_Float scale_temp = scale/temp;
324 ssq = 1.0 + ssq*scale_temp*scale_temp;
325 scale = temp;
326 }
327 else
328 {
329 const APL_Float temp_scale = temp/scale;
330 ssq += temp_scale*temp_scale;
331 }
332 ***/
333 ret += re*re;
334 }
335
336 const APL_Float im = IMAG(*dj);
337 if (im != 0.0)
338 {
339 /***
340 const APL_Float temp = ABS(im);
341 if (scale < temp)
342 {
343 const APL_Float scale_temp = scale/temp;
344 ssq = 1.0 + ssq*scale_temp*scale_temp;
345 scale = temp;
346 }
347 else
348 {
349 const APL_Float temp_scale = temp/scale;
350 ssq += temp_scale*temp_scale;
351 }
352 ***/
353 ret += im*im;
354 }
355 ++dj;
356 }
357 return sqrt(ret);
358 // return scale*sqrt(ssq);
359 }
360
361 /// multiply \b this by \b factor
scale(T factor)362 void scale(T factor)
363 {
364 T * dj = data;
365 loop(j, len)
366 {
367 *dj = *dj * factor;
368 ++dj;
369 }
370 }
371
372 /// set all elemens of \b this to 0
clear()373 void clear()
374 {
375 T * dj = data;
376 loop(j, len) *dj++ = 0.0;
377 }
378
379 /// return true if \b this is 0
is_zero(ShapeItem count) const380 bool is_zero(ShapeItem count) const
381 {
382 const T * dj = data;
383 loop(j, count)
384 {
385 if (IMAG(*dj) != 0.0) return false;
386 if (REAL(*dj++) != 0.0) return false;
387 }
388 return true;
389 }
390
391 /// conjugate \b this
conjugate()392 void conjugate()
393 {
394 if (!is_ZZ()) return;
395 T * dj = data;
396 loop(j, len) DZ::conjugate(*dj++);
397 }
398
399 /// add \b other to \b this
add(const Vector & other)400 void add(const Vector & other)
401 {
402 assert(len == other.len);
403 T * dj = data;
404 T * sj = other.data;
405 loop(j, len) *dj++ += *sj++;
406 }
407
408 /// return \b this[i]
at(ShapeItem i)409 T & at(ShapeItem i)
410 { assert(i < len); return *(data + i); }
411
412 /// return \b this[i]
at(ShapeItem i) const413 const T & at(ShapeItem i) const
414 { assert(i < len); return *(data + i); }
415
416 protected:
417 /// the elements of \b this vector
418 T * const data;
419
420 /// the length of this vector
421 const ShapeItem len;
422 };
423 //-----------------------------------------------------------------------------
424 /// A double or complex matrix
425 template<typename T>
426 class Matrix
427 {
428 public:
429 /// constructor: _rows by _cols matrix values from _data
430 /// _data must outlive \b this!
Matrix(T * _data,ShapeItem _rows,ShapeItem _cols,ShapeItem _dx)431 Matrix(T * _data, ShapeItem _rows, ShapeItem _cols, ShapeItem _dx)
432 : data(_data),
433 rows(_rows),
434 cols(_cols),
435 dx(_dx)
436 {}
437
438 /// constructor: sub-matrix of \b other with \b new_col_count columns
439 /// other must outlive \b this!
Matrix(const Matrix & other,ShapeItem new_col_count)440 Matrix(const Matrix & other, ShapeItem new_col_count)
441 : data(other.data),
442 rows(other.rows),
443 cols(new_col_count),
444 dx(other.dx)
445 { assert(new_col_count <= other.cols); }
446
447 /// return the sub-matrix starting at row y and column x
sub_yx(ShapeItem row,ShapeItem col)448 Matrix sub_yx(ShapeItem row, ShapeItem col)
449 {
450 assert(col <= cols && row <= rows);
451 return Matrix(&at(row, col), rows - row, cols - col, dx);
452 }
453
454 /// return the sub-matrix of size new_len_y: new_len_x
455 /// starting at row 0 and column 0
sub_len(ShapeItem new_rows,ShapeItem new_cols)456 Matrix sub_len(ShapeItem new_rows, ShapeItem new_cols)
457 {
458 assert(new_rows <= rows);
459 assert(new_cols <= cols);
460 return Matrix(data, new_rows, new_cols, dx);
461 }
462
463 /// return true if \b this is a matrix of complex numbers (and not a matrix
464 /// of real numbers
is_ZZ() const465 bool is_ZZ() const { return DZ::is_ZZ(*data); }
466
467 /// return the number of rows
get_row_count() const468 const ShapeItem get_row_count() const
469 { return rows; }
470
471 /// return the number of columns
get_column_count() const472 const ShapeItem get_column_count() const
473 { return cols; }
474
475 /// return column \b c
get_column(ShapeItem c)476 Vector<T> get_column(ShapeItem c)
477 {
478 return Vector<T>(data + c*dx, rows);
479 }
480
481 /// return column \b c
get_column(ShapeItem c) const482 const Vector<T> get_column(ShapeItem c) const
483 {
484 return Vector<T>(data + c*dx, rows);
485 }
486
487 /// return \b this[i, j]
at(ShapeItem i,ShapeItem j)488 T & at(ShapeItem i, ShapeItem j)
489 { assert(i < rows); assert(j < cols); return *(data + i + j*dx); }
490
491 /// return \b this[i, j]
at(ShapeItem i,ShapeItem j) const492 const T & at(ShapeItem i, ShapeItem j) const
493 { assert(i < rows); assert(j < cols); return *(data + i + j*dx); }
494
495 /// return \b this[i, i]
diag(ShapeItem i) const496 T & diag(ShapeItem i) const
497 { assert(i < rows); assert(i < cols); return *(data + i*(1 + dx)); }
498
499 /// swap columns c1 and c2
exchange_columns(ShapeItem c1,ShapeItem c2)500 void exchange_columns(ShapeItem c1, ShapeItem c2)
501 {
502 assert(c1 < cols);
503 assert(c2 < cols);
504 T * p1 = data + c1*dx;
505 T * p2 = data + c2*dx;
506 loop(r, rows) exchange(*p1++, *p2++);
507 }
508
509 /// return the distance between two adjacent columns in \b data
get_dx() const510 ShapeItem get_dx() const { return dx; }
511
512 /// return the length of the largest element
max_norm() const513 APL_Float max_norm() const
514 {
515 const ShapeItem len = rows * cols;
516 if (is_ZZ()) // complex max. norm
517 {
518 APL_Float ret2 = 0;
519 loop(l, len)
520 {
521 const APL_Float a2 = ABS_2(data[l]);
522 if (ret2 < a2) ret2 = a2;
523 }
524 return sqrt(ret2);
525 }
526 else // real max norm
527 {
528 APL_Float ret = 0;
529 loop(l, len)
530 {
531 const APL_Float a = DZ::get_real(data[l]);
532 if (ret < a) ret = a;
533 else if (ret < -a) ret = -a;
534 }
535 return ret;
536 }
537 }
538
539 /// multiply \b this by \b factor
scale(T factor)540 void scale(T factor)
541 {
542 T * dj = data;
543 const ShapeItem len = rows*cols;
544 loop(j, len)
545 {
546 *dj = *dj * factor;
547 ++dj;
548 }
549 }
550
551 protected:
552 /// the elements of \b this matrix
553 T * const data;
554
555 /// the number of rows of \b this matrix
556 const ShapeItem rows;
557
558 /// the number of columns of \b this matrix
559 const ShapeItem cols;
560
561 /// return the distance between two adjacent columns in \b data
562 const ShapeItem dx;
563 };
564 //=============================================================================
565
566 #include <math.h>
567 #include <stdio.h>
568 #include <string.h>
569
570 //=============================================================================
571
572 // numerical limits
573 //
574 #define dlamch_E 1.11022e-16
575 #define dlamch_S 2.22507e-308
576 #define dlamch_P 2.22045e-16
577 static const APL_Float small_number = dlamch_S / dlamch_P;
578 static const APL_Float big_number = 1.0 / small_number;
579 static const APL_Float safe_min = dlamch_S / dlamch_E;
580 static const APL_Float inv_safe_min = 1.0 / safe_min;
581 static const APL_Float tol3z = sqrt(dlamch_E);
582
583 //-----------------------------------------------------------------------------
584 /// return the offset of the largest element in vec
585 inline ShapeItem
max_pos(const APL_Float * vec,ShapeItem len)586 max_pos(const APL_Float * vec, ShapeItem len)
587 {
588 ShapeItem imax = 0;
589 APL_Float dmax = ABS(vec[0]);
590 for (ShapeItem j = 1; j < len; ++j)
591 {
592 const APL_Float dj = ABS(vec[j]);
593 if (dmax < dj ) { dmax = dj; imax = j; }
594 }
595
596 return imax;
597 }
598 //-----------------------------------------------------------------------------
599 /// LApack function larfg
larfg(ShapeItem N,T & ALPHA,Vector<T> & x)600 template<typename T> T larfg(ShapeItem N, T & ALPHA, Vector<T> &x)
601 {
602 assert(N > 0);
603
604 if (N == 1) return T(0.0);
605
606 APL_Float xnorm = x.norm();
607 APL_Float alpha_r = REAL(ALPHA);
608 APL_Float alpha_i = IMAG(ALPHA);
609
610 if (xnorm == 0.0 && alpha_i == 0.0) return T(0.0);
611
612 APL_Float beta = -SIGN(sqrt(alpha_r*alpha_r + alpha_i*alpha_i + xnorm*xnorm),
613 alpha_r);
614
615 int kcnt = 0;
616 if (ABS(beta) < safe_min)
617 {
618 while (ABS(beta) < safe_min)
619 {
620 ++kcnt;
621 x.scale(inv_safe_min);
622 beta = beta * inv_safe_min;
623 alpha_r = alpha_r * inv_safe_min;
624 alpha_i = alpha_i * inv_safe_min;
625 }
626
627 xnorm = x.norm();
628 Sri<T>(ALPHA, alpha_r, alpha_i);
629 beta = -SIGN(sqrt(alpha_r*alpha_r +
630 alpha_i*alpha_i +
631 xnorm*xnorm), alpha_r);
632 }
633
634 T tau;
635 Sri<T>(tau, (beta - alpha_r)/beta, -alpha_i/beta);
636 const T factor = DZ::inv(ALPHA - beta);
637 x.scale(factor);
638
639 loop(k, kcnt) beta = beta * safe_min;
640 Sri<T>(ALPHA, beta);
641
642 return tau;
643 }
644 //-----------------------------------------------------------------------------
645 /// LApack function trsm
646 template<typename T>
trsm(int M,int N,const Matrix<T> & A,Matrix<T> & B)647 void trsm(int M, int N, const Matrix<T> & A, Matrix<T> & B)
648 {
649 // only: SIDE = 'Left' - lside == true
650 // UPLO = 'Upper' - upper = true
651 // TRANSA = 'No transpose' -
652 // DIAG = 'Non-unit' and - nounit = true
653 // ALPHA = 1.0 is implemented!
654 //
655 loop(j_0, N)
656 {
657 for (ShapeItem k_0 = M - 1; k_0 >= 0; --k_0)
658 {
659 T & B_kj = B.at(k_0, j_0);
660 if (B_kj != 0.0)
661 {
662 B_kj = B_kj / A.diag(k_0);
663 loop(i_0, k_0)
664 B.at(i_0, j_0) = B.at(i_0, j_0) - B_kj * A.at(i_0, k_0);
665 }
666 }
667 }
668 }
669 //-----------------------------------------------------------------------------
670 /// LApack function ila_lc
671 template<typename T>
ila_lc(ShapeItem M,ShapeItem N,const Matrix<T> & A)672 int ila_lc(ShapeItem M, ShapeItem N, const Matrix<T> & A)
673 {
674 for (ShapeItem col = N - 1; col > 0; --col)
675 {
676 const Vector<T> column = A.get_column(col);
677 if (!column.is_zero(M)) return col + 1;
678
679 /* the above is the same as:
680
681 for (ShapeItem row = 0; row < M; ++row)
682 {
683 if (REAL(A[row + col*LDA]) != 0) return col + 1;
684 if (IMAG(A[row + col*LDA]) != 0) return col + 1;
685 }
686
687 */
688 }
689
690 return 1;
691 }
692 //-----------------------------------------------------------------------------
693 /// LApack function gemv
694 template<typename T>
gemv(int M,int N,const Matrix<T> & A,const Vector<T> & x,Vector<T> & y)695 inline void gemv(int M, int N, const Matrix<T> & A, const Vector<T> &x,
696 Vector<T> &y)
697 {
698 loop(j, N)
699 {
700 T temp(0.0);
701 loop(i, M) temp = temp + DZ::CONJ(A.at(i, j)) * x.at(i);
702 y.at(j) = temp;
703 }
704 }
705 //-----------------------------------------------------------------------------
706 /// LApack function gerc
707 template<typename T>
gerc(int M,int N,T ALPHA,const Vector<T> & x,const Vector<T> & y,Matrix<T> & A)708 void gerc(int M, int N, T ALPHA, const Vector<T> &x, const Vector<T> &y,
709 Matrix<T> & A)
710 {
711 if (M == 0 || N == 0 || ALPHA == 0.0) return;
712
713 loop(j, N)
714 {
715 T Y_j = y.at(j);
716 if (Y_j != 0.0)
717 {
718 DZ::conjugate(Y_j);
719 Y_j = Y_j * ALPHA;
720 loop(i, M) A.at(i, j) = A.at(i, j) + Y_j * x.at(i);
721 }
722 }
723 }
724 //-----------------------------------------------------------------------------
725 /// LApack function larf
726 template<typename T>
larf(Vector<T> & v,T tau,Matrix<T> & c)727 void larf(Vector<T> & v, T tau, Matrix<T> & c)
728 {
729 const ShapeItem M = c.get_row_count();
730 const ShapeItem N = c.get_column_count();
731
732 // only SIDE == "Left" is implemented
733 //
734 ShapeItem lastv = 0;
735 ShapeItem lastc = 0;
736
737 if (tau != 0.0)
738 {
739 // Look for the last non-zero row in V
740 //
741 lastv = M;
742 while (lastv > 0 && v.at(lastv - 1) == 0.0) --lastv;
743
744 // Scan for the last non-zero column in C(1:lastv,:)
745 //
746 lastc = ila_lc<T>(lastv, N, c);
747 }
748
749 if (lastv)
750 {
751 APL_Float * gemv_data = new APL_Float[2*N]; // N double or N complex
752 Vector<T> gemv_result(reinterpret_cast<T *>(gemv_data), N);
753
754 gemv<T>(lastv, lastc, c, v, gemv_result);
755 gerc<T>(lastv, lastc, -tau, v, gemv_result, c);
756 delete[] gemv_data;
757 }
758 }
759 //-----------------------------------------------------------------------------
760 /// LApack function laqp2
761 template<typename T>
laqp2(Matrix<T> & A,ShapeItem * pivot,T * tau,APL_Float * vn1)762 void laqp2(Matrix<T> & A, ShapeItem * pivot, T * tau, APL_Float * vn1)
763 {
764 const ShapeItem M = A.get_row_count();
765 const ShapeItem N = A.get_column_count();
766
767 APL_Float * vn2 = vn1 + N;
768
769 loop(i_0, N)
770 {
771 T & tau_i = tau[i_0];
772
773 // Determine the i'th pivot column and swap if necessary.
774 //
775 const int pvt_0 = i_0 + max_pos(vn1 + i_0, N - i_0);
776
777 if (pvt_0 != i_0)
778 {
779 A.exchange_columns(pvt_0, i_0);
780 exchange(pivot[pvt_0], pivot[i_0]);
781 vn1[pvt_0] = vn1[i_0];
782 vn2[pvt_0] = vn2[i_0];
783 }
784
785 // Generate elementary reflector H(i).
786 //
787 {
788 if (i_0 < (M - 1))
789 {
790 Vector<T> x(&A.at(i_0 + 1, i_0), M - i_0 - 1);
791 T & alpha = *(&x.at(0) - 1); // one before x
792 tau_i = larfg<T>(M - i_0, alpha, x);
793 }
794 else
795 {
796 Vector<T> x(&A.at(M - 1, i_0), 1);
797 T & alpha = x.at(0); // at x
798 tau_i = larfg<T>(1, alpha, x);
799 }
800 }
801
802 if (i_0 < (N - 1))
803 {
804 // Apply H(i)**H to A(offset+i:m,i+1:n) from the left.
805 //
806 const T Aii = A.diag(i_0); // save diag
807 A.diag(i_0) = APL_Float(1.0);
808 Vector<T> v(&A.diag(i_0), M - i_0);
809 Matrix<T> c = A.sub_yx(i_0, i_0 + 1);
810 larf<T>(v, DZ::CONJ(tau_i), c);
811 A.diag(i_0) = Aii; // restore diag
812 }
813
814 // Update partial column norms.
815 //
816 for (ShapeItem j_0 = i_0 + 1; j_0 < N; ++j_0)
817 {
818 if (vn1[j_0] != 0.0)
819 {
820 APL_Float temp = ABS(A.at(i_0, j_0)) / vn1[j_0];
821 temp = 1.0 - temp*temp;
822 temp = MAX(temp, APL_Float(0.0));
823
824 APL_Float temp2 = vn1[j_0] / vn2[j_0];
825 temp2 = temp * temp2 * temp2;
826 if (temp2 <= tol3z)
827 {
828 if (i_0 < (M - 1))
829 {
830 Vector<T> x(&A.at(i_0 + 1, j_0), M - i_0 - 1);
831 vn1[j_0] = x.norm();
832 vn2[j_0] = vn1[j_0];
833 }
834 else
835 {
836 vn1[j_0] = 0.0;
837 vn2[j_0] = 0.0;
838 }
839 }
840 else
841 {
842 vn1[j_0] = vn1[j_0] * sqrt(temp);
843 }
844 }
845 }
846 }
847 }
848 //-----------------------------------------------------------------------------
849 /// LApack function laic1 (maximum case)
850 template<typename T>
laic1_max(APL_Float SEST,T alpha,T GAMMA,APL_Float & SESTPR,T & S,T & C)851 void laic1_max(APL_Float SEST, T alpha, T GAMMA, APL_Float &SESTPR, T &S, T &C)
852 {
853 const APL_Float eps = dlamch_E;
854 const APL_Float abs_alpha = ABS(alpha);
855 const APL_Float abs_gamma = ABS(GAMMA);
856 const APL_Float abs_estimate = ABS(SEST);
857
858 // Estimating largest singular value ...
859 //
860
861 // special cases
862 //
863 if (SEST == 0.0)
864 {
865 const APL_Float s1 = MAX(abs_gamma, abs_alpha);
866 if (s1 == 0.0)
867 {
868 S = T(0.0);
869 C = T(1.0);
870 SESTPR = 0.0;
871 }
872 else
873 {
874 S = alpha / s1;
875 C = GAMMA / s1;
876
877 const APL_Float tmp = sqrt(ABS_2(S) + ABS_2(C));
878 S = S / tmp;
879 C = C / tmp;
880 SESTPR = s1*tmp;
881 }
882 return;
883 }
884
885 if (abs_gamma <= eps*abs_estimate)
886 {
887 S = T(1.0);
888 C = T(0.0);
889 APL_Float tmp = MAX(abs_estimate, abs_alpha);
890 const APL_Float s1 = abs_estimate / tmp;
891 const APL_Float s2 = abs_alpha / tmp;
892 SESTPR = tmp*sqrt(s1*s1 + s2*s2);
893 return;
894 }
895
896 if (abs_alpha <= eps*abs_estimate)
897 {
898 const APL_Float s1 = abs_gamma;
899 const APL_Float s2 = abs_estimate;
900 if (s1 <= s2)
901 {
902 S = T(1.0);
903 C = T(0.0);
904 SESTPR = s2;
905 }
906 else
907 {
908 S = T(0.0);
909 C = T(1.0);
910 SESTPR = s1;
911 }
912 return;
913 }
914
915 if (abs_estimate <= eps*abs_alpha || abs_estimate <= eps*abs_gamma)
916 {
917 const APL_Float s1 = abs_gamma;
918 const APL_Float s2 = abs_alpha;
919 if (s1 <= s2)
920 {
921 const APL_Float tmp = s1 / s2;
922 const APL_Float scl = sqrt(1.0 + tmp*tmp);
923 SESTPR = s2*scl;
924 S = (alpha / s2) / scl;
925 C = (GAMMA / s2) / scl;
926 }
927 else
928 {
929 const APL_Float tmp = s2 / s1;
930 const APL_Float scl = sqrt(1.0 + tmp*tmp);
931 SESTPR = s1*scl;
932 S = (alpha / s1) / scl;
933 C = (GAMMA / s1) / scl;
934 }
935 return;
936 }
937
938 // normal case
939 //
940 const APL_Float zeta1 = abs_alpha / abs_estimate;
941 const APL_Float zeta2 = abs_gamma / abs_estimate;
942 const APL_Float b = (1.0 - zeta1*zeta1 - zeta2*zeta2)*0.5;
943 const APL_Float zeta1_2 = zeta1*zeta1;
944 APL_Float t;
945
946 if (b > 0.0) t = zeta1_2 / (b + sqrt(b*b + zeta1_2));
947 else t = sqrt(b*b + zeta1_2) - b;
948
949 const T sine = -( alpha / abs_estimate ) / t;
950 const T cosine = -( GAMMA / abs_estimate ) / ( 1.0 + t);
951 const APL_Float tmp = sqrt(ABS_2(sine) + ABS_2(cosine));
952 S = sine / tmp;
953 C = cosine / tmp;
954 SESTPR = sqrt(t + 1.0) * abs_estimate;
955 }
956 //-----------------------------------------------------------------------------
957 /// LApack function laic1 (minimum case)
958 template<typename T>
laic1_min(APL_Float SEST,T alpha,T GAMMA,APL_Float & SESTPR,T & S,T & C)959 void laic1_min(APL_Float SEST, T alpha, T GAMMA, APL_Float &SESTPR, T &S, T &C)
960 {
961 const APL_Float eps = dlamch_E;
962 const APL_Float abs_alpha = ABS(alpha);
963 const APL_Float abs_gamma = ABS(GAMMA);
964 const APL_Float abs_estimate = ABS(SEST);
965
966 // Estimating smallest singular value ...
967 //
968
969 // special cases
970 //
971 if (SEST == 0.0)
972 {
973 SESTPR = 0.0;
974 T sine(1.0);
975 T cosine(0.0);
976 if (abs_gamma > 0.0 || abs_alpha > 0.0)
977 {
978 sine = -DZ::CONJ(GAMMA);
979 cosine = DZ::CONJ(alpha);
980 }
981
982 const APL_Float abs_sine = ABS(sine);
983 const APL_Float abs_cosine = ABS(cosine);
984 const APL_Float s1 = MAX(abs_sine, abs_cosine);
985 const T sine_s1 = sine / s1;
986 const T cosine_s1 = cosine / s1;
987 S = sine_s1;
988 C = cosine_s1;
989 const APL_Float tmp = sqrt(ABS_2(sine_s1) + ABS_2(cosine_s1));
990 S = S / tmp;
991 C = C / tmp;
992 return;
993 }
994
995 if (abs_gamma <= eps*abs_estimate)
996 {
997 S = T(0.0);
998 C = T(1.0);
999 SESTPR = abs_gamma;
1000 return;
1001 }
1002
1003 if (abs_alpha <= eps*abs_estimate)
1004 {
1005 const APL_Float s1 = abs_gamma;
1006 const APL_Float s2 = abs_estimate;
1007
1008 if (s1 <= s2)
1009 {
1010 S = T(0.0);
1011 C = T(1.0);
1012 SESTPR = s1;
1013 }
1014 else
1015 {
1016 S = T(1.0);
1017 C = T(0.0);
1018 SESTPR = s2;
1019 }
1020 return;
1021 }
1022
1023 if (abs_estimate <= eps*abs_alpha || abs_estimate <= eps*abs_gamma)
1024 {
1025 const APL_Float s1 = abs_gamma;
1026 const APL_Float s2 = abs_alpha;
1027 const T conj_gamma = DZ::CONJ(GAMMA);
1028 const T conj_alpha = DZ::CONJ(alpha);
1029
1030 if (s1 <= s2)
1031 {
1032 const APL_Float tmp = s1 / s2;
1033 const APL_Float scl = sqrt(1.0 + tmp*tmp);
1034 const APL_Float tmp_scl = tmp / scl;
1035
1036 SESTPR = abs_estimate * tmp_scl;
1037 S = -(conj_gamma / s2 ) / scl;
1038 C = (conj_alpha / s2 ) / scl;
1039 }
1040 else
1041 {
1042 const APL_Float tmp = s2 / s1;
1043 const APL_Float scl = sqrt(1.0 + tmp*tmp);
1044
1045 SESTPR = abs_estimate / scl;
1046 S = -( conj_gamma / s1 ) / scl;
1047 C = ( conj_alpha / s1 ) / scl;
1048 }
1049 return;
1050 }
1051
1052 // normal case
1053 //
1054 const APL_Float zeta1 = abs_alpha / abs_estimate;
1055 const APL_Float zeta2 = abs_gamma / abs_estimate;
1056
1057 const APL_Float norma_1 = 1.0 + zeta1*zeta1 + zeta1*zeta2;
1058 const APL_Float norma_2 = zeta1*zeta2 + zeta2*zeta2;
1059 const APL_Float norma = MAX(norma_1, norma_2);
1060
1061 const APL_Float test = 1.0 + 2.0*(zeta1-zeta2)*(zeta1+zeta2);
1062 T sine;
1063 T cosine;
1064 if (test >= 0.0 )
1065 {
1066 // root is close to zero, compute directly
1067 //
1068 const APL_Float b = (zeta1*zeta1 + zeta2*zeta2 + 1.0)*0.5;
1069 const APL_Float zeta2_2 = zeta2*zeta2;
1070 const APL_Float t = zeta2_2 / (b + sqrt(ABS(b*b - zeta2_2)));
1071 sine = (alpha / abs_estimate) / (1.0 - t);
1072 cosine = -( GAMMA / abs_estimate ) / t;
1073 SESTPR = sqrt(t + 4.0*eps*eps*norma)*abs_estimate;
1074 }
1075 else
1076 {
1077 // root is closer to ONE, shift by that amount
1078 //
1079 const APL_Float b = (zeta2*zeta2 + zeta1*zeta1 - 1.0)*0.5;
1080 const APL_Float zeta1_2 = zeta1*zeta1;
1081 APL_Float t;
1082 if (b >= 0.0) t = -zeta1_2 / (b+sqrt(b*b + zeta1_2));
1083 else t = b - sqrt(b*b + zeta1_2);
1084
1085 sine = -( alpha / abs_estimate ) / t;
1086 cosine = -( GAMMA / abs_estimate ) / (1.0 + t);
1087 SESTPR = sqrt(1.0 + t + 4.0*eps*eps*norma)*abs_estimate;
1088 }
1089
1090 const APL_Float tmp = sqrt(ABS_2(sine) + ABS_2(cosine));
1091 S = sine / tmp;
1092 C = cosine / tmp;
1093 }
1094 //-----------------------------------------------------------------------------
1095 /// LApack function unm2r
1096 template<typename T>
unm2r(ShapeItem K,Matrix<T> & A,const T * tau,Matrix<T> & c)1097 void unm2r(ShapeItem K, Matrix<T> & A, const T * tau, Matrix<T> & c)
1098 {
1099 const ShapeItem M = c.get_row_count();
1100
1101 // only SIDE == "Left" is implemented
1102 // only TRANS = 'T' or 'C' is implemented
1103 // thus NOTRANS is always false
1104 //
1105 loop(i_0, K)
1106 {
1107 // H(i) or H(i)**H is applied to C(i:m,1:n)
1108 //
1109 const int MM = M - i_0;
1110
1111 // Apply H(i) or H(i)**H
1112 //
1113 const T taui = DZ::CONJ(tau[i_0]);
1114
1115 const T Aii = A.diag(i_0);
1116 A.diag(i_0) = APL_Float(1.0);
1117 Vector<T> v(&A.diag(i_0), MM);
1118 Matrix<T> c1 = c.sub_yx(i_0, 0);
1119 larf<T>(v, taui, c1);
1120 A.diag(i_0) = Aii;
1121 }
1122 }
1123 //-----------------------------------------------------------------------------
1124 /// LApack function geqp3
1125 template<typename T>
geqp3(Matrix<T> & A,ShapeItem * pivot,T * tau)1126 void geqp3(Matrix<T> & A, ShapeItem * pivot, T * tau)
1127 {
1128 const ShapeItem M = A.get_row_count();
1129 const ShapeItem N = A.get_column_count();
1130
1131 // init column permutaion for pivoting
1132 // so we simply create the identical permutation
1133 //
1134 loop(j_0, N) pivot[j_0] = j_0;
1135
1136 // (no fixed columns)
1137
1138 // Factorize free columns
1139 // ======================
1140 //
1141 {
1142 // Initialize partial column norms.
1143 // the first N elements of WORK store the exact column norms.
1144 //
1145 APL_Float * vn12 = new APL_Float[2*N];
1146 for (int j_0 = 0; j_0 < N; ++j_0)
1147 {
1148 Vector<T> x(&A.at(0, j_0), M);
1149 vn12[N + j_0] = vn12[j_0] = x.norm();
1150 }
1151
1152 // Use unblocked code to factor the last or only block
1153 //
1154 laqp2<T>(A, pivot, tau, vn12);
1155 delete[] vn12;
1156 }
1157 }
1158 //-----------------------------------------------------------------------------
1159 /// LApack function estimating the rank of \b A
1160 template<typename T>
estimate_rank(const Matrix<T> & A,APL_Float rcond)1161 int estimate_rank(const Matrix<T> & A, APL_Float rcond)
1162 {
1163 // Determine RANK using incremental condition estimation...
1164
1165 const ShapeItem N = A.get_column_count();
1166
1167 APL_Float smax = ABS(A.diag(0));
1168 APL_Float smin = smax;
1169 if (smax == 0.0) return 0;
1170
1171 // store minima in work_min[ 0 ... N]
1172 // store maxima in work_max == work_min[N ... 2N]
1173 //
1174 APL_Float * work_1 = new APL_Float[4*N];
1175 T * work_min = reinterpret_cast<T *>(work_1);
1176 T * work_max = work_min + N;
1177
1178 work_min[0] = T(1.0);
1179 work_max[0] = T(1.0);
1180
1181 for (int RANK = 1; RANK < N; ++RANK)
1182 {
1183 APL_Float sminpr(0.0);
1184 APL_Float smaxpr(0.0);
1185 T s1(0.0);
1186 T s2(0.0);
1187 T c1(0.0);
1188 T c2(0.0);
1189 const T gamma(A.diag(RANK));
1190
1191 T alpha_min(0.0);
1192 T alpha_max(0.0);
1193 for (int r = 0; r < RANK; ++r)
1194 {
1195 alpha_min = alpha_min + DZ::CONJ(work_min[r] * A.at(r, RANK));
1196 alpha_max = alpha_max + DZ::CONJ(work_max[r] * A.at(r, RANK));
1197 }
1198
1199 laic1_min<T>(smin, alpha_min, gamma, sminpr, s1, c1);
1200 laic1_max<T>(smax, alpha_max, gamma, smaxpr, s2, c2);
1201
1202 if (smaxpr*rcond > sminpr)
1203 {
1204 delete[] work_1;
1205 return RANK;
1206 }
1207
1208 loop(cI, RANK)
1209 {
1210 work_min[cI] = work_min[cI] * s1;
1211 work_max[cI] = work_max[cI] * s2;
1212 }
1213 work_min[RANK] = c1;
1214 work_max[RANK] = c2;
1215 smin = sminpr;
1216 smax = smaxpr;
1217 }
1218
1219 delete[] work_1;
1220 return N;
1221 }
1222 //-----------------------------------------------------------------------------
1223 template<typename T>
scaled_gelsy(Matrix<T> & A,Matrix<T> & B,double rcond)1224 int scaled_gelsy(Matrix<T> & A, Matrix<T> & B, double rcond)
1225 {
1226 // gelsy is optimized for (and restricted to) the following conditions:
1227 //
1228 // 0 < N <= M → min_NM = N and max_MN = M
1229 // 0 < NRHS
1230 //
1231 const ShapeItem N = A.get_column_count();
1232 const ShapeItem NRHS = B.get_column_count();
1233
1234 // Compute QR factorization with column pivoting of A:
1235 // A * P = Q * R
1236 //
1237 char * pivot_tau_tmp = new char[N*(sizeof(ShapeItem) + 4*sizeof(APL_Float))];
1238
1239 // N pivot items
1240 ShapeItem * pivot = reinterpret_cast<ShapeItem *>(pivot_tau_tmp);
1241 T * tau = reinterpret_cast<T *>(pivot + N); // N complex tau items
1242 T * tmp = tau + N; // N complex tmp1 items
1243
1244 geqp3<T>(A, pivot, tau);
1245
1246 // Details of Householder rotations stored in WORK(1:MN).
1247 //
1248 // Determine RANK using incremental condition estimation
1249 //
1250 {
1251 const int RANK = estimate_rank(A, rcond);
1252 if (RANK < N)
1253 {
1254 delete[] pivot_tau_tmp;
1255 return RANK;
1256 }
1257 }
1258
1259 // from here on, RANK == N. We leave RANK in the comments but use N un
1260 // the code.
1261
1262 // Logically partition R = [ R11 R12 ]
1263 // [ 0 R22 ]
1264 // where R11 = R(1:RANK,1:RANK)
1265 // [R11,R12] = [ T11, 0 ] * Y
1266 //
1267
1268 // Details of Householder rotations stored in WORK(MN+1:2*MN)
1269 //
1270 // B(1:M, 1:NRHS) := Q**H * B(1:M,1:NRHS)
1271 //
1272 unm2r<T>(N, A, tau, B);
1273
1274 // B(1:RANK, 1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
1275 //
1276 {
1277 Matrix<T> B1 = B.sub_len(B.get_dx(), NRHS);
1278 trsm<T>(N, NRHS, A, B1);
1279 }
1280
1281 // workspace: 2*MN+NRHS
1282 //
1283 // B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
1284 //
1285 loop(j_0, NRHS)
1286 {
1287 loop(i_0, N) tmp[pivot[i_0]] = B.at(i_0, j_0);
1288 loop(i_0, N) B.at(i_0, j_0) = tmp[i_0];
1289 }
1290
1291 delete[] pivot_tau_tmp;
1292 return N;
1293 }
1294 //-----------------------------------------------------------------------------
1295 template<typename T>
gelsy(Matrix<T> & A,Matrix<T> & B,APL_Float rcond)1296 int gelsy(Matrix<T> & A, Matrix<T> &B, APL_Float rcond)
1297 {
1298 const ShapeItem M = A.get_row_count();
1299 const ShapeItem N = A.get_column_count();
1300 const ShapeItem NRHS = B.get_column_count();
1301
1302 // APL is responsible for handling the empty cases
1303 //
1304 Assert(M && N && NRHS && N <= M);
1305
1306 // For better precision, scale A and B so that their max. element lies
1307 // between small_number and big_number. Then call scaled_gelsy() and
1308 // scale the result back by the same factors.
1309 //
1310 APL_Float scale_A = 1.0;
1311 {
1312 const APL_Float norm_A = A.max_norm();
1313
1314 if (norm_A == 0.0) return 0; // A is rank-deficient
1315 else if (norm_A < small_number) A.scale(scale_A = big_number);
1316 else if (norm_A > big_number) A.scale(scale_A = small_number);
1317 }
1318
1319 APL_Float scale_B = 1.0;
1320 {
1321 const APL_Float norm_B = B.max_norm();
1322
1323 if (norm_B == 0.0) /* OK */ ;
1324 else if (norm_B < small_number) B.scale(scale_B = big_number);
1325 else if (norm_B > big_number) B.scale(scale_B = small_number);
1326 }
1327
1328 {
1329 const int RANK = scaled_gelsy(A, B, rcond);
1330 if (RANK < N) return RANK; // this is an error
1331 }
1332
1333 // Undo scaling
1334 //
1335 if (scale_A != 1.0)
1336 {
1337 Matrix<T> A1 = A.sub_len(N, N);
1338 Matrix<T> B1 = B.sub_len(N, NRHS);
1339 B1.scale(APL_Float(1.0/scale_A));
1340 A1.scale(scale_A);
1341 }
1342
1343 if (scale_B != 1.0)
1344 {
1345 Matrix<T> B1 = B.sub_len(N, NRHS);
1346 B1.scale(scale_B);
1347 }
1348
1349 return N; // success
1350 }
1351 //=============================================================================
1352
1353 #endif // __GELSY_HH_DEFINED__
1354