1 /* svd_si.h automatically generated by makeheader from svd.template */
2 
3 #define assert(A)
4 /* stuff included from libs/ap.h */
5 
6 /********************************************************************
7 AP library.
8 See www.alglib.net or alglib.sources.ru for details.
9 ********************************************************************/
10 
11 #ifndef AP_H
12 #define AP_H
13 
14 #include <stdlib.h>
15 #include <math.h>
16 #include "factory/globaldefs.h"
17 #include "resources/feFopen.h"
18 #include "kernel/mod2.h"
19 
20 #ifdef HAVE_SVD
21 /********************************************************************
22 Checking of the array boundaries mode.
23 ********************************************************************/
24 //#define NO_AP_ASSERT
25 #define AP_ASSERT
26 
27 #ifndef AP_ASSERT
28 #define NO_AP_ASSERT
29 #endif
30 
31 #ifdef NO_AP_ASSERT
32 #ifdef AP_ASSERT
33 #undef NO_AP_ASSERT
34 #endif
35 #endif
36 
37 
38 /********************************************************************
39 This symbol is used for debugging. Do not define it and do not remove
40 comments.
41 ********************************************************************/
42 //#define UNSAFE_MEM_COPY
43 
44 
45 /********************************************************************
46 Namespace of a standard library AlgoPascal.
47 ********************************************************************/
48 namespace ap
49 {
50 
51 
52 /********************************************************************
53 Exception class.
54 ********************************************************************/
55 class ap_error
56 {
57 public:
make_assertion(bool bClause)58     static void make_assertion(bool bClause)
59         { if(!bClause) /*throw ap_error();*/ ::WerrorS("ap_error"); };
60 private:
61 };
62 
63 /********************************************************************
64 Class defining a complex number with double precision.
65 ********************************************************************/
66 class complex;
67 
68 class complex
69 {
70 public:
complex()71     complex():x(0.0),y(0.0){};
complex(const double & _x)72     complex(const double &_x):x(_x),y(0.0){};
complex(const double & _x,const double & _y)73     complex(const double &_x, const double &_y):x(_x),y(_y){};
complex(const complex & z)74     complex(const complex &z):x(z.x),y(z.y){};
75 
76     complex& operator= (const double& v){ x  = v; y = 0.0; return *this; };
77     complex& operator+=(const double& v){ x += v;          return *this; };
78     complex& operator-=(const double& v){ x -= v;          return *this; };
79     complex& operator*=(const double& v){ x *= v; y *= v;  return *this; };
80     complex& operator/=(const double& v){ x /= v; y /= v;  return *this; };
81 
82     complex& operator= (const complex& z){ x  = z.x; y  = z.y; return *this; };
83     complex& operator+=(const complex& z){ x += z.x; y += z.y; return *this; };
84     complex& operator-=(const complex& z){ x -= z.x; y -= z.y; return *this; };
85     complex& operator*=(const complex& z){ double t = x*z.x-y*z.y; y = x*z.y+y*z.x; x = t; return *this; };
86     complex& operator/=(const complex& z)
87     {
88         ap::complex result;
89         double e;
90         double f;
91         if( fabs(z.y)<fabs(z.x) )
92         {
93             e = z.y/z.x;
94             f = z.x+z.y*e;
95             result.x = (z.x+z.y*e)/f;
96             result.y = (z.y-z.x*e)/f;
97         }
98         else
99         {
100             e = z.x/z.y;
101             f = z.y+z.x*e;
102             result.x = (z.y+z.x*e)/f;
103             result.y = (-z.x+z.y*e)/f;
104         }
105         *this = result;
106         return *this;
107     };
108 
109     double x, y;
110 };
111 
112 const complex operator/(const complex& lhs, const complex& rhs);
113 bool operator==(const complex& lhs, const complex& rhs);
114 bool operator!=(const complex& lhs, const complex& rhs);
115 const complex operator+(const complex& lhs);
116 const complex operator-(const complex& lhs);
117 const complex operator+(const complex& lhs, const complex& rhs);
118 const complex operator+(const complex& lhs, const double& rhs);
119 const complex operator+(const double& lhs, const complex& rhs);
120 const complex operator-(const complex& lhs, const complex& rhs);
121 const complex operator-(const complex& lhs, const double& rhs);
122 const complex operator-(const double& lhs, const complex& rhs);
123 const complex operator*(const complex& lhs, const complex& rhs);
124 const complex operator*(const complex& lhs, const double& rhs);
125 const complex operator*(const double& lhs, const complex& rhs);
126 const complex operator/(const complex& lhs, const complex& rhs);
127 const complex operator/(const double& lhs, const complex& rhs);
128 const complex operator/(const complex& lhs, const double& rhs);
129 double abscomplex(const complex &z);
130 const complex conj(const complex &z);
131 const complex csqr(const complex &z);
132 
133 
134 /********************************************************************
135 Template defining vector in memory. It is used by the basic
136 subroutines of linear algebra.
137 
138 Vector consists of Length elements of type T, starting from an element,
139 which Data is pointed to. Interval between adjacent elements equals
140 the value of Step.
141 
142 The class provides an access for reading only.
143 ********************************************************************/
144 template<class T>
145 class const_raw_vector
146 {
147 public:
const_raw_vector(const T * Data,int Length,int Step)148     const_raw_vector(const T *Data, int Length, int Step):
149         pData(const_cast<T*>(Data)),iLength(Length),iStep(Step){};
150 
GetData()151     const T* GetData() const
152     { return pData; };
153 
GetLength()154     int GetLength() const
155     { return iLength; };
156 
GetStep()157     int GetStep() const
158     { return iStep; };
159 protected:
160     T       *pData;
161     int     iLength, iStep;
162 };
163 
164 
165 /********************************************************************
166 Template defining vector in memory, derived from const_raw_vector.
167 It is used by the basic subroutines of linear algebra.
168 
169 Vector consists of Length elements of type T, starting from an element,
170 which Data is pointed to. Interval between adjacent elements equals
171 the value of Step.
172 
173 The class provides an access both for reading and writing.
174 ********************************************************************/
175 template<class T>
176 class raw_vector : public const_raw_vector<T>
177 {
178 public:
raw_vector(T * Data,int Length,int Step)179     raw_vector(T *Data, int Length, int Step):const_raw_vector<T>(Data, Length, Step){};
180 
GetData()181     T* GetData()
182     { return const_raw_vector<T>::pData; };
183 };
184 
185 
186 /********************************************************************
187 Scalar product
188 ********************************************************************/
189 template<class T>
vdotproduct(const_raw_vector<T> v1,const_raw_vector<T> v2)190 T vdotproduct(const_raw_vector<T> v1, const_raw_vector<T> v2)
191 {
192     ap_error::make_assertion(v1.GetLength()==v2.GetLength());
193     if( v1.GetStep()==1 && v2.GetStep()==1 )
194     {
195         //
196         // fast
197         //
198         T r = 0;
199         const T *p1 = v1.GetData();
200         const T *p2 = v2.GetData();
201         int imax = v1.GetLength()/4;
202         int i;
203         for(i=imax; i!=0; i--)
204         {
205             r += (*p1)*(*p2) + p1[1]*p2[1] + p1[2]*p2[2] + p1[3]*p2[3];
206             p1+=4;
207             p2+=4;
208         }
209         for(i=0; i<v1.GetLength()%4; i++)
210             r += (*(p1++))*(*(p2++));
211         return r;
212     }
213     else
214     {
215         //
216         // general
217         //
218         int offset11 = v1.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
219         int offset21 = v2.GetStep(), offset22 = 2*offset21, offset23 = 3*offset21, offset24 = 4*offset21;
220         T r = 0;
221         const T *p1 = v1.GetData();
222         const T *p2 = v2.GetData();
223         int imax = v1.GetLength()/4;
224         int i;
225         for(i=0; i<imax; i++)
226         {
227             r += (*p1)*(*p2) + p1[offset11]*p2[offset21] + p1[offset12]*p2[offset22] + p1[offset13]*p2[offset23];
228             p1+=offset14;
229             p2+=offset24;
230         }
231         for(i=0; i<v1.GetLength()%4; i++)
232         {
233             r += (*p1)*(*p2);
234             p1+=offset11;
235             p2+=offset21;
236         }
237         return r;
238     }
239 }
240 
241 
242 /********************************************************************
243 Copy one vector into another
244 ********************************************************************/
245 template<class T>
vmove(raw_vector<T> vdst,const_raw_vector<T> vsrc)246 void vmove(raw_vector<T> vdst, const_raw_vector<T> vsrc)
247 {
248     ap_error::make_assertion(vdst.GetLength()==vsrc.GetLength());
249     if( vdst.GetStep()==1 && vsrc.GetStep()==1 )
250     {
251         //
252         // fast
253         //
254         T *p1 = vdst.GetData();
255         const T *p2 = vsrc.GetData();
256         int imax = vdst.GetLength()/2;
257         int i;
258         for(i=imax; i!=0; i--)
259         {
260             *p1 = *p2;
261             p1[1] = p2[1];
262             p1 += 2;
263             p2 += 2;
264         }
265         if(vdst.GetLength()%2 != 0)
266             *p1 = *p2;
267         return;
268     }
269     else
270     {
271         //
272         // general
273         //
274         int offset11 = vdst.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
275         int offset21 = vsrc.GetStep(), offset22 = 2*offset21, offset23 = 3*offset21, offset24 = 4*offset21;
276         T *p1 = vdst.GetData();
277         const T *p2 = vsrc.GetData();
278         int imax = vdst.GetLength()/4;
279         int i;
280         for(i=0; i<imax; i++)
281         {
282             *p1 = *p2;
283             p1[offset11] = p2[offset21];
284             p1[offset12] = p2[offset22];
285             p1[offset13] = p2[offset23];
286             p1 += offset14;
287             p2 += offset24;
288         }
289         for(i=0; i<vdst.GetLength()%4; i++)
290         {
291             *p1 = *p2;
292             p1 += vdst.GetStep();
293             p2 += vsrc.GetStep();
294         }
295         return;
296     }
297 }
298 
299 
300 /********************************************************************
301 Copy one vector multiplied by -1 into another.
302 ********************************************************************/
303 template<class T>
vmoveneg(raw_vector<T> vdst,const_raw_vector<T> vsrc)304 void vmoveneg(raw_vector<T> vdst, const_raw_vector<T> vsrc)
305 {
306     ap_error::make_assertion(vdst.GetLength()==vsrc.GetLength());
307     if( vdst.GetStep()==1 && vsrc.GetStep()==1 )
308     {
309         //
310         // fast
311         //
312         T *p1 = vdst.GetData();
313         const T *p2 = vsrc.GetData();
314         int imax = vdst.GetLength()/2;
315         int i;
316         for(i=0; i<imax; i++)
317         {
318             *p1 = -*p2;
319             p1[1] = -p2[1];
320             p1 += 2;
321             p2 += 2;
322         }
323         if(vdst.GetLength()%2 != 0)
324             *p1 = -*p2;
325         return;
326     }
327     else
328     {
329         //
330         // general
331         //
332         int offset11 = vdst.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
333         int offset21 = vsrc.GetStep(), offset22 = 2*offset21, offset23 = 3*offset21, offset24 = 4*offset21;
334         T *p1 = vdst.GetData();
335         const T *p2 = vsrc.GetData();
336         int imax = vdst.GetLength()/4;
337         int i;
338         for(i=imax; i!=0; i--)
339         {
340             *p1 = -*p2;
341             p1[offset11] = -p2[offset21];
342             p1[offset12] = -p2[offset22];
343             p1[offset13] = -p2[offset23];
344             p1 += offset14;
345             p2 += offset24;
346         }
347         for(i=0; i<vdst.GetLength()%4; i++)
348         {
349             *p1 = -*p2;
350             p1 += vdst.GetStep();
351             p2 += vsrc.GetStep();
352         }
353         return;
354     }
355 }
356 
357 
358 /********************************************************************
359 Copy one vector multiplied by a number into another vector.
360 ********************************************************************/
361 template<class T, class T2>
vmove(raw_vector<T> vdst,const_raw_vector<T> vsrc,T2 alpha)362 void vmove(raw_vector<T> vdst, const_raw_vector<T> vsrc, T2 alpha)
363 {
364     ap_error::make_assertion(vdst.GetLength()==vsrc.GetLength());
365     if( vdst.GetStep()==1 && vsrc.GetStep()==1 )
366     {
367         //
368         // fast
369         //
370         T *p1 = vdst.GetData();
371         const T *p2 = vsrc.GetData();
372         int imax = vdst.GetLength()/4;
373         int i;
374         for(i=imax; i!=0; i--)
375         {
376             *p1 = alpha*(*p2);
377             p1[1] = alpha*p2[1];
378             p1[2] = alpha*p2[2];
379             p1[3] = alpha*p2[3];
380             p1 += 4;
381             p2 += 4;
382         }
383         for(i=0; i<vdst.GetLength()%4; i++)
384             *(p1++) = alpha*(*(p2++));
385         return;
386     }
387     else
388     {
389         //
390         // general
391         //
392         int offset11 = vdst.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
393         int offset21 = vsrc.GetStep(), offset22 = 2*offset21, offset23 = 3*offset21, offset24 = 4*offset21;
394         T *p1 = vdst.GetData();
395         const T *p2 = vsrc.GetData();
396         int imax = vdst.GetLength()/4;
397         int i;
398         for(i=0; i<imax; i++)
399         {
400             *p1 = alpha*(*p2);
401             p1[offset11] = alpha*p2[offset21];
402             p1[offset12] = alpha*p2[offset22];
403             p1[offset13] = alpha*p2[offset23];
404             p1 += offset14;
405             p2 += offset24;
406         }
407         for(i=0; i<vdst.GetLength()%4; i++)
408         {
409             *p1 = alpha*(*p2);
410             p1 += vdst.GetStep();
411             p2 += vsrc.GetStep();
412         }
413         return;
414     }
415 }
416 
417 
418 /********************************************************************
419 Vector addition
420 ********************************************************************/
421 template<class T>
vadd(raw_vector<T> vdst,const_raw_vector<T> vsrc)422 void vadd(raw_vector<T> vdst, const_raw_vector<T> vsrc)
423 {
424     ap_error::make_assertion(vdst.GetLength()==vsrc.GetLength());
425     if( vdst.GetStep()==1 && vsrc.GetStep()==1 )
426     {
427         //
428         // fast
429         //
430         T *p1 = vdst.GetData();
431         const T *p2 = vsrc.GetData();
432         int imax = vdst.GetLength()/4;
433         int i;
434         for(i=imax; i!=0; i--)
435         {
436             *p1 += *p2;
437             p1[1] += p2[1];
438             p1[2] += p2[2];
439             p1[3] += p2[3];
440             p1 += 4;
441             p2 += 4;
442         }
443         for(i=0; i<vdst.GetLength()%4; i++)
444             *(p1++) += *(p2++);
445         return;
446     }
447     else
448     {
449         //
450         // general
451         //
452         int offset11 = vdst.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
453         int offset21 = vsrc.GetStep(), offset22 = 2*offset21, offset23 = 3*offset21, offset24 = 4*offset21;
454         T *p1 = vdst.GetData();
455         const T *p2 = vsrc.GetData();
456         int imax = vdst.GetLength()/4;
457         int i;
458         for(i=0; i<imax; i++)
459         {
460             *p1 += *p2;
461             p1[offset11] += p2[offset21];
462             p1[offset12] += p2[offset22];
463             p1[offset13] += p2[offset23];
464             p1 += offset14;
465             p2 += offset24;
466         }
467         for(i=0; i<vdst.GetLength()%4; i++)
468         {
469             *p1 += *p2;
470             p1 += vdst.GetStep();
471             p2 += vsrc.GetStep();
472         }
473         return;
474     }
475 }
476 
477 
478 /********************************************************************
479 Add one vector multiplied by a number to another vector.
480 ********************************************************************/
481 template<class T, class T2>
vadd(raw_vector<T> vdst,const_raw_vector<T> vsrc,T2 alpha)482 void vadd(raw_vector<T> vdst, const_raw_vector<T> vsrc, T2 alpha)
483 {
484     ap_error::make_assertion(vdst.GetLength()==vsrc.GetLength());
485     if( vdst.GetStep()==1 && vsrc.GetStep()==1 )
486     {
487         //
488         // fast
489         //
490         T *p1 = vdst.GetData();
491         const T *p2 = vsrc.GetData();
492         int imax = vdst.GetLength()/4;
493         int i;
494         for(i=imax; i!=0; i--)
495         {
496             *p1 += alpha*(*p2);
497             p1[1] += alpha*p2[1];
498             p1[2] += alpha*p2[2];
499             p1[3] += alpha*p2[3];
500             p1 += 4;
501             p2 += 4;
502         }
503         for(i=0; i<vdst.GetLength()%4; i++)
504             *(p1++) += alpha*(*(p2++));
505         return;
506     }
507     else
508     {
509         //
510         // general
511         //
512         int offset11 = vdst.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
513         int offset21 = vsrc.GetStep(), offset22 = 2*offset21, offset23 = 3*offset21, offset24 = 4*offset21;
514         T *p1 = vdst.GetData();
515         const T *p2 = vsrc.GetData();
516         int imax = vdst.GetLength()/4;
517         int i;
518         for(i=0; i<imax; i++)
519         {
520             *p1 += alpha*(*p2);
521             p1[offset11] += alpha*p2[offset21];
522             p1[offset12] += alpha*p2[offset22];
523             p1[offset13] += alpha*p2[offset23];
524             p1 += offset14;
525             p2 += offset24;
526         }
527         for(i=0; i<vdst.GetLength()%4; i++)
528         {
529             *p1 += alpha*(*p2);
530             p1 += vdst.GetStep();
531             p2 += vsrc.GetStep();
532         }
533         return;
534     }
535 }
536 
537 
538 /********************************************************************
539 Vector subtraction
540 ********************************************************************/
541 template<class T>
vsub(raw_vector<T> vdst,const_raw_vector<T> vsrc)542 void vsub(raw_vector<T> vdst, const_raw_vector<T> vsrc)
543 {
544     ap_error::make_assertion(vdst.GetLength()==vsrc.GetLength());
545     if( vdst.GetStep()==1 && vsrc.GetStep()==1 )
546     {
547         //
548         // fast
549         //
550         T *p1 = vdst.GetData();
551         const T *p2 = vsrc.GetData();
552         int imax = vdst.GetLength()/4;
553         int i;
554         for(i=imax; i!=0; i--)
555         {
556             *p1 -= *p2;
557             p1[1] -= p2[1];
558             p1[2] -= p2[2];
559             p1[3] -= p2[3];
560             p1 += 4;
561             p2 += 4;
562         }
563         for(i=0; i<vdst.GetLength()%4; i++)
564             *(p1++) -= *(p2++);
565         return;
566     }
567     else
568     {
569         //
570         // general
571         //
572         int offset11 = vdst.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
573         int offset21 = vsrc.GetStep(), offset22 = 2*offset21, offset23 = 3*offset21, offset24 = 4*offset21;
574         T *p1 = vdst.GetData();
575         const T *p2 = vsrc.GetData();
576         int imax = vdst.GetLength()/4;
577         int i;
578         for(i=0; i<imax; i++)
579         {
580             *p1 -= *p2;
581             p1[offset11] -= p2[offset21];
582             p1[offset12] -= p2[offset22];
583             p1[offset13] -= p2[offset23];
584             p1 += offset14;
585             p2 += offset24;
586         }
587         for(i=0; i<vdst.GetLength()%4; i++)
588         {
589             *p1 -= *p2;
590             p1 += vdst.GetStep();
591             p2 += vsrc.GetStep();
592         }
593         return;
594     }
595 }
596 
597 
598 /********************************************************************
599 Subtract one vector multiplied by a number from another vector.
600 ********************************************************************/
601 template<class T, class T2>
vsub(raw_vector<T> vdst,const_raw_vector<T> vsrc,T2 alpha)602 void vsub(raw_vector<T> vdst, const_raw_vector<T> vsrc, T2 alpha)
603 {
604     vadd(vdst, vsrc, -alpha);
605 }
606 
607 
608 /********************************************************************
609 In-place vector multiplication
610 ********************************************************************/
611 template<class T, class T2>
vmul(raw_vector<T> vdst,T2 alpha)612 void vmul(raw_vector<T> vdst, T2 alpha)
613 {
614     if( vdst.GetStep()==1 )
615     {
616         //
617         // fast
618         //
619         T *p1 = vdst.GetData();
620         int imax = vdst.GetLength()/4;
621         int i;
622         for(i=imax; i!=0; i--)
623         {
624             *p1 *= alpha;
625             p1[1] *= alpha;
626             p1[2] *= alpha;
627             p1[3] *= alpha;
628             p1 += 4;
629         }
630         for(i=0; i<vdst.GetLength()%4; i++)
631             *(p1++) *= alpha;
632         return;
633     }
634     else
635     {
636         //
637         // general
638         //
639         int offset11 = vdst.GetStep(), offset12 = 2*offset11, offset13 = 3*offset11, offset14 = 4*offset11;
640         T *p1 = vdst.GetData();
641         int imax = vdst.GetLength()/4;
642         int i;
643         for(i=0; i<imax; i++)
644         {
645             *p1 *= alpha;
646             p1[offset11] *= alpha;
647             p1[offset12] *= alpha;
648             p1[offset13] *= alpha;
649             p1 += offset14;
650         }
651         for(i=0; i<vdst.GetLength()%4; i++)
652         {
653             *p1 *= alpha;
654             p1 += vdst.GetStep();
655         }
656         return;
657     }
658 }
659 
660 
661 /********************************************************************
662 Template of a dynamical one-dimensional array
663 ********************************************************************/
664 template<class T>
665 class template_1d_array
666 {
667 public:
template_1d_array()668     template_1d_array()
669     {
670         m_Vec=0;
671         m_iVecSize = 0;
672     };
673 
~template_1d_array()674     ~template_1d_array()
675     {
676         if(m_Vec)
677             delete[] m_Vec;
678     };
679 
template_1d_array(const template_1d_array & rhs)680     template_1d_array(const template_1d_array &rhs)
681     {
682         m_iVecSize = rhs.m_iVecSize;
683         m_iLow = rhs.m_iLow;
684         m_iHigh = rhs.m_iHigh;
685         if(rhs.m_Vec)
686         {
687             m_Vec = new T[m_iVecSize];
688             #ifndef UNSAFE_MEM_COPY
689             for(int i=0; i<m_iVecSize; i++)
690                 m_Vec[i] = rhs.m_Vec[i];
691             #else
692             memcpy(m_Vec, rhs.m_Vec, m_iVecSize*sizeof(T));
693             #endif
694         }
695         else
696             m_Vec=0;
697     };
698 
699 
700     const template_1d_array& operator=(const template_1d_array &rhs)
701     {
702         if( this==&rhs )
703             return *this;
704 
705         m_iLow = rhs.m_iLow;
706         m_iHigh = rhs.m_iHigh;
707         m_iVecSize = rhs.m_iVecSize;
708         if(m_Vec)
709             delete[] m_Vec;
710         if(rhs.m_Vec)
711         {
712             m_Vec = new T[m_iVecSize];
713             #ifndef UNSAFE_MEM_COPY
714             for(int i=0; i<m_iVecSize; i++)
715                 m_Vec[i] = rhs.m_Vec[i];
716             #else
717             memcpy(m_Vec, rhs.m_Vec, m_iVecSize*sizeof(T));
718             #endif
719         }
720         else
721             m_Vec=0;
722         return *this;
723     };
724 
725 
operator()726     const T& operator()(int i) const
727     {
728         #ifndef NO_AP_ASSERT
729         ap_error::make_assertion(i>=m_iLow && i<=m_iHigh);
730         #endif
731         return m_Vec[ i-m_iLow ];
732     };
733 
734 
operator()735     T& operator()(int i)
736     {
737         #ifndef NO_AP_ASSERT
738         ap_error::make_assertion(i>=m_iLow && i<=m_iHigh);
739         #endif
740         return m_Vec[ i-m_iLow ];
741     };
742 
743 
setbounds(int iLow,int iHigh)744     void setbounds( int iLow, int iHigh )
745     {
746         if(m_Vec)
747             delete[] m_Vec;
748         m_iLow = iLow;
749         m_iHigh = iHigh;
750         m_iVecSize = iHigh-iLow+1;
751         m_Vec = new T[m_iVecSize];
752     };
753 
754 
setcontent(int iLow,int iHigh,const T * pContent)755     void setcontent( int iLow, int iHigh, const T *pContent )
756     {
757         setbounds(iLow, iHigh);
758         for(int i=iLow; i<=iHigh; i++)
759             (*this)(i) = pContent[i-iLow];
760     };
761 
762 
getcontent()763     T* getcontent()
764     {
765         return m_Vec;
766     };
767 
getcontent()768     const T* getcontent() const
769     {
770         return m_Vec;
771     };
772 
773 
774     int getlowbound(int iBoundNum = 0) const
775     {
776         return m_iLow;
777     };
778 
779 
780     int gethighbound(int iBoundNum = 0) const
781     {
782         return m_iHigh;
783     };
784 
getvector(int iStart,int iEnd)785     raw_vector<T> getvector(int iStart, int iEnd)
786     {
787         if( iStart>iEnd || wrongIdx(iStart) || wrongIdx(iEnd) )
788             return raw_vector<T>(0, 0, 1);
789         else
790             return raw_vector<T>(m_Vec+iStart-m_iLow, iEnd-iStart+1, 1);
791     };
792 
793 
getvector(int iStart,int iEnd)794     const_raw_vector<T> getvector(int iStart, int iEnd) const
795     {
796         if( iStart>iEnd || wrongIdx(iStart) || wrongIdx(iEnd) )
797             return const_raw_vector<T>(0, 0, 1);
798         else
799             return const_raw_vector<T>(m_Vec+iStart-m_iLow, iEnd-iStart+1, 1);
800     };
801 private:
wrongIdx(int i)802     bool wrongIdx(int i) const { return i<m_iLow || i>m_iHigh; };
803 
804     T         *m_Vec;
805     long      m_iVecSize;
806     long      m_iLow, m_iHigh;
807 };
808 
809 
810 
811 /********************************************************************
812 Template of a dynamical two-dimensional array
813 ********************************************************************/
814 template<class T>
815 class template_2d_array
816 {
817 public:
template_2d_array()818     template_2d_array()
819     {
820         m_Vec=0;
821         m_iVecSize=0;
822     };
823 
~template_2d_array()824     ~template_2d_array()
825     {
826         if(m_Vec)
827             delete[] m_Vec;
828     };
829 
template_2d_array(const template_2d_array & rhs)830     template_2d_array(const template_2d_array &rhs)
831     {
832         m_iVecSize = rhs.m_iVecSize;
833         m_iLow1 = rhs.m_iLow1;
834         m_iLow2 = rhs.m_iLow2;
835         m_iHigh1 = rhs.m_iHigh1;
836         m_iHigh2 = rhs.m_iHigh2;
837         m_iConstOffset = rhs.m_iConstOffset;
838         m_iLinearMember = rhs.m_iLinearMember;
839         if(rhs.m_Vec)
840         {
841             m_Vec = new T[m_iVecSize];
842             #ifndef UNSAFE_MEM_COPY
843             for(int i=0; i<m_iVecSize; i++)
844                 m_Vec[i] = rhs.m_Vec[i];
845             #else
846             memcpy(m_Vec, rhs.m_Vec, m_iVecSize*sizeof(T));
847             #endif
848         }
849         else
850             m_Vec=0;
851     };
852     const template_2d_array& operator=(const template_2d_array &rhs)
853     {
854         if( this==&rhs )
855             return *this;
856 
857         m_iLow1 = rhs.m_iLow1;
858         m_iLow2 = rhs.m_iLow2;
859         m_iHigh1 = rhs.m_iHigh1;
860         m_iHigh2 = rhs.m_iHigh2;
861         m_iConstOffset = rhs.m_iConstOffset;
862         m_iLinearMember = rhs.m_iLinearMember;
863         m_iVecSize = rhs.m_iVecSize;
864         if(m_Vec)
865             delete[] m_Vec;
866         if(rhs.m_Vec)
867         {
868             m_Vec = new T[m_iVecSize];
869             #ifndef UNSAFE_MEM_COPY
870             for(int i=0; i<m_iVecSize; i++)
871                 m_Vec[i] = rhs.m_Vec[i];
872             #else
873             memcpy(m_Vec, rhs.m_Vec, m_iVecSize*sizeof(T));
874             #endif
875         }
876         else
877             m_Vec=0;
878         return *this;
879     };
880 
operator()881     const T& operator()(int i1, int i2) const
882     {
883         #ifndef NO_AP_ASSERT
884         ap_error::make_assertion(i1>=m_iLow1 && i1<=m_iHigh1);
885         ap_error::make_assertion(i2>=m_iLow2 && i2<=m_iHigh2);
886         #endif
887         return m_Vec[ m_iConstOffset + i2 +i1*m_iLinearMember];
888     };
889 
operator()890     T& operator()(int i1, int i2)
891     {
892         #ifndef NO_AP_ASSERT
893         ap_error::make_assertion(i1>=m_iLow1 && i1<=m_iHigh1);
894         ap_error::make_assertion(i2>=m_iLow2 && i2<=m_iHigh2);
895         #endif
896         return m_Vec[ m_iConstOffset + i2 +i1*m_iLinearMember];
897     };
898 
setbounds(int iLow1,int iHigh1,int iLow2,int iHigh2)899     void setbounds( int iLow1, int iHigh1, int iLow2, int iHigh2 )
900     {
901         if(m_Vec)
902             delete[] m_Vec;
903         m_iVecSize = (iHigh1-iLow1+1)*(iHigh2-iLow2+1);
904         m_Vec = new T[m_iVecSize];
905         m_iLow1  = iLow1;
906         m_iHigh1 = iHigh1;
907         m_iLow2  = iLow2;
908         m_iHigh2 = iHigh2;
909         m_iConstOffset = -m_iLow2-m_iLow1*(m_iHigh2-m_iLow2+1);
910         m_iLinearMember = (m_iHigh2-m_iLow2+1);
911     };
912 
setcontent(int iLow1,int iHigh1,int iLow2,int iHigh2,const T * pContent)913     void setcontent( int iLow1, int iHigh1, int iLow2, int iHigh2, const T *pContent )
914     {
915         setbounds(iLow1, iHigh1, iLow2, iHigh2);
916         for(int i=0; i<m_iVecSize; i++)
917             m_Vec[i]=pContent[i];
918     };
919 
getcontent()920     T* getcontent()
921     {
922         return m_Vec;
923     };
924 
getcontent()925     const T* getcontent() const
926     {
927         return m_Vec;
928     };
929 
getlowbound(int iBoundNum)930     int getlowbound(int iBoundNum) const
931     {
932         return iBoundNum==1 ? m_iLow1 : m_iLow2;
933     };
934 
gethighbound(int iBoundNum)935     int gethighbound(int iBoundNum) const
936     {
937         return iBoundNum==1 ? m_iHigh1 : m_iHigh2;
938     };
939 
getcolumn(int iColumn,int iRowStart,int iRowEnd)940     raw_vector<T> getcolumn(int iColumn, int iRowStart, int iRowEnd)
941     {
942         if( (iRowStart>iRowEnd) || wrongColumn(iColumn) || wrongRow(iRowStart) ||wrongRow(iRowEnd) )
943             return raw_vector<T>(0, 0, 1);
944         else
945             return raw_vector<T>(&((*this)(iRowStart, iColumn)), iRowEnd-iRowStart+1, m_iLinearMember);
946     };
947 
getrow(int iRow,int iColumnStart,int iColumnEnd)948     raw_vector<T> getrow(int iRow, int iColumnStart, int iColumnEnd)
949     {
950         if( (iColumnStart>iColumnEnd) || wrongRow(iRow) || wrongColumn(iColumnStart) || wrongColumn(iColumnEnd))
951             return raw_vector<T>(0, 0, 1);
952         else
953             return raw_vector<T>(&((*this)(iRow, iColumnStart)), iColumnEnd-iColumnStart+1, 1);
954     };
955 
getcolumn(int iColumn,int iRowStart,int iRowEnd)956     const_raw_vector<T> getcolumn(int iColumn, int iRowStart, int iRowEnd) const
957     {
958         if( (iRowStart>iRowEnd) || wrongColumn(iColumn) || wrongRow(iRowStart) ||wrongRow(iRowEnd) )
959             return const_raw_vector<T>(0, 0, 1);
960         else
961             return const_raw_vector<T>(&((*this)(iRowStart, iColumn)), iRowEnd-iRowStart+1, m_iLinearMember);
962     };
963 
getrow(int iRow,int iColumnStart,int iColumnEnd)964     const_raw_vector<T> getrow(int iRow, int iColumnStart, int iColumnEnd) const
965     {
966         if( (iColumnStart>iColumnEnd) || wrongRow(iRow) || wrongColumn(iColumnStart) || wrongColumn(iColumnEnd))
967             return const_raw_vector<T>(0, 0, 1);
968         else
969             return const_raw_vector<T>(&((*this)(iRow, iColumnStart)), iColumnEnd-iColumnStart+1, 1);
970     };
971 private:
wrongRow(int i)972     bool wrongRow(int i) const { return i<m_iLow1 || i>m_iHigh1; };
wrongColumn(int j)973     bool wrongColumn(int j) const { return j<m_iLow2 || j>m_iHigh2; };
974 
975     T           *m_Vec;
976     long        m_iVecSize;
977     long        m_iLow1, m_iLow2, m_iHigh1, m_iHigh2;
978     long        m_iConstOffset, m_iLinearMember;
979 };
980 
981 
982 typedef template_1d_array<int>     integer_1d_array;
983 typedef template_1d_array<double>  real_1d_array;
984 typedef template_1d_array<complex> complex_1d_array;
985 typedef template_1d_array<bool>    boolean_1d_array;
986 typedef template_2d_array<int>     integer_2d_array;
987 typedef template_2d_array<double>  real_2d_array;
988 typedef template_2d_array<complex> complex_2d_array;
989 typedef template_2d_array<bool>    boolean_2d_array;
990 
991 
992 /********************************************************************
993 Constants and functions introduced for compatibility with AlgoPascal
994 ********************************************************************/
995 extern const double machineepsilon;
996 extern const double maxrealnumber;
997 extern const double minrealnumber;
998 
999 int sign(double x);
1000 double randomreal();
1001 int randominteger(int maxv);
1002 int round(double x);
1003 int trunc(double x);
1004 int ifloor(double x);
1005 int iceil(double x);
1006 double pi();
1007 double sqr(double x);
1008 int maxint(int m1, int m2);
1009 int minint(int m1, int m2);
1010 double maxreal(double m1, double m2);
1011 double minreal(double m1, double m2);
1012 
1013 };//namespace ap
1014 
1015 
1016 /* stuff included from libs/amp.h */
1017 
1018 #include "omalloc/omalloc.h"
1019 
1020 #include <gmp.h>
1021 #include <mpfr.h>
1022 #include <stdexcept>
1023 #include <math.h>
1024 #include <string>
1025 #include <stdio.h>
1026 #include <time.h>
1027 #include <memory.h>
1028 #include <vector>
1029 #include <list>
1030 
1031 //#define _AMP_NO_TEMPLATE_CONSTRUCTORS
1032 
1033 namespace amp
1034 {
1035     class exception {};
1036     class incorrectPrecision    : public exception {};
1037     class overflow              : public exception {};
1038     class divisionByZero        : public exception {};
1039     class sqrtOfNegativeNumber  : public exception {};
1040     class invalidConversion     : public exception {};
1041     class invalidString         : public exception {};
1042     class internalError         : public exception {};
1043     class domainError           : public exception {};
1044 
1045     typedef unsigned long unsigned32;
1046     typedef signed long   signed32;
1047 
1048     struct mpfr_record
1049     {
1050         unsigned int refCount;
1051         unsigned int Precision;
1052         mpfr_t value;
1053         mpfr_record *next;
1054     };
1055 
1056     typedef mpfr_record* mpfr_record_ptr;
1057 
1058     //
1059     // storage for mpfr_t instances
1060     //
1061     class mpfr_storage
1062     {
1063     public:
1064         static mpfr_record* newMpfr(unsigned int Precision);
1065         static void deleteMpfr(mpfr_record* ref);
1066         /*static void clearStorage();*/
1067         static gmp_randstate_t* getRandState();
1068     private:
1069         static mpfr_record_ptr& getList(unsigned int Precision);
1070     };
1071 
1072     //
1073     // mpfr_t reference
1074     //
1075     class mpfr_reference
1076     {
1077     public:
1078         mpfr_reference();
1079         mpfr_reference(const mpfr_reference& r);
1080         mpfr_reference& operator= (const mpfr_reference &r);
1081         ~mpfr_reference();
1082 
1083         void initialize(int Precision);
1084         void free();
1085 
1086         mpfr_srcptr getReadPtr() const;
1087         mpfr_ptr getWritePtr();
1088     private:
1089         mpfr_record *ref;
1090     };
1091 
1092     //
1093     // ampf template
1094     //
1095     template<unsigned int Precision>
1096     class ampf
1097     {
1098     public:
1099         //
1100         // Destructor
1101         //
~ampf()1102         ~ampf()
1103         {
1104             rval->refCount--;
1105             if( rval->refCount==0 )
1106                 mpfr_storage::deleteMpfr(rval);
1107         }
1108 
1109         //
1110         // Initializing
1111         //
ampf()1112         ampf ()                 { InitializeAsZero(); }
ampf(mpfr_record * v)1113         ampf(mpfr_record *v)    { rval = v; }
1114 
ampf(long double v)1115         ampf (long double v)    { InitializeAsDouble(v); }
ampf(double v)1116         ampf (double v)         { InitializeAsDouble(v); }
ampf(float v)1117         ampf (float v)          { InitializeAsDouble(v); }
ampf(signed long v)1118         ampf (signed long v)    { InitializeAsSLong(v); }
ampf(unsigned long v)1119         ampf (unsigned long v)  { InitializeAsULong(v); }
ampf(signed int v)1120         ampf (signed int v)     { InitializeAsSLong(v); }
ampf(unsigned int v)1121         ampf (unsigned int v)   { InitializeAsULong(v); }
ampf(signed short v)1122         ampf (signed short v)   { InitializeAsSLong(v); }
ampf(unsigned short v)1123         ampf (unsigned short v) { InitializeAsULong(v); }
ampf(signed char v)1124         ampf (signed char v)    { InitializeAsSLong(v); }
ampf(unsigned char v)1125         ampf (unsigned char v)  { InitializeAsULong(v); }
1126 
1127         //
1128         // initializing from string
1129         // string s must have format "X0.hhhhhhhh@eee" or "X-0.hhhhhhhh@eee"
1130         //
ampf(const std::string & s)1131         ampf (const std::string &s) { InitializeAsString(s.c_str()); }
ampf(const char * s)1132         ampf (const char *s)        { InitializeAsString(s); }
1133 
1134         //
1135         // copy constructors
1136         //
ampf(const ampf & r)1137         ampf(const ampf& r)
1138         {
1139             rval = r.rval;
1140             rval->refCount++;
1141         }
1142 #ifndef _AMP_NO_TEMPLATE_CONSTRUCTORS
1143         template<unsigned int Precision2>
ampf(const ampf<Precision2> & r)1144         ampf(const ampf<Precision2>& r)
1145         {
1146             CheckPrecision();
1147             rval = mpfr_storage::newMpfr(Precision);
1148             mpfr_set(getWritePtr(), r.getReadPtr(), GMP_RNDN);
1149         }
1150 #endif
1151 
1152         //
1153         // Assignment constructors
1154         //
1155         ampf& operator= (long double v)         { mpfr_set_ld(getWritePtr(), v, GMP_RNDN); return *this; }
1156         ampf& operator= (double v)              { mpfr_set_ld(getWritePtr(), v, GMP_RNDN); return *this; }
1157         ampf& operator= (float v)               { mpfr_set_ld(getWritePtr(), v, GMP_RNDN); return *this; }
1158         ampf& operator= (signed long v)         { mpfr_set_si(getWritePtr(), v, GMP_RNDN); return *this; }
1159         ampf& operator= (unsigned long v)       { mpfr_set_ui(getWritePtr(), v, GMP_RNDN); return *this; }
1160         ampf& operator= (signed int v)          { mpfr_set_si(getWritePtr(), v, GMP_RNDN); return *this; }
1161         ampf& operator= (unsigned int v)        { mpfr_set_ui(getWritePtr(), v, GMP_RNDN); return *this; }
1162         ampf& operator= (signed short v)        { mpfr_set_si(getWritePtr(), v, GMP_RNDN); return *this; }
1163         ampf& operator= (unsigned short v)      { mpfr_set_ui(getWritePtr(), v, GMP_RNDN); return *this; }
1164         ampf& operator= (signed char v)         { mpfr_set_si(getWritePtr(), v, GMP_RNDN); return *this; }
1165         ampf& operator= (unsigned char v)       { mpfr_set_ui(getWritePtr(), v, GMP_RNDN); return *this; }
1166         ampf& operator= (const char *s)         { mpfr_strtofr(getWritePtr(), s, NULL, 0, GMP_RNDN); return *this; }
1167         ampf& operator= (const std::string &s)  { mpfr_strtofr(getWritePtr(), s.c_str(), NULL, 0, GMP_RNDN); return *this; }
1168         ampf& operator= (const ampf& r)
1169         {
1170             // TODO: may be copy ref
1171             if( this==&r )
1172                 return *this;
1173             if( rval==r.rval )
1174                 return *this;
1175             rval->refCount--;
1176             if( rval->refCount==0 )
1177                 mpfr_storage::deleteMpfr(rval);
1178             rval = r.rval;
1179             rval->refCount++;
1180             //mpfr_set(getWritePtr(), r.getReadPtr(), GMP_RNDN);
1181             return *this;
1182         }
1183 #ifndef _AMP_NO_TEMPLATE_CONSTRUCTORS
1184         template<unsigned int Precision2>
1185         ampf& operator= (const ampf<Precision2>& r)
1186         {
1187             if( (void*)this==(void*)(&r) )
1188                 return *this;
1189             mpfr_set(getWritePtr(), r.getReadPtr(), GMP_RNDN);
1190             return *this;
1191         }
1192 #endif
1193 
1194         //
1195         // in-place operators
1196         // TODO: optimize
1197         //
1198         template<class T> ampf& operator+=(const T& v){ *this = *this + v; return *this; };
1199         template<class T> ampf& operator-=(const T& v){ *this = *this - v; return *this; };
1200         template<class T> ampf& operator*=(const T& v){ *this = *this * v; return *this; };
1201         template<class T> ampf& operator/=(const T& v){ *this = *this / v; return *this; };
1202 
1203         //
1204         // MPFR access
1205         //
1206         mpfr_srcptr getReadPtr() const;
1207         mpfr_ptr getWritePtr();
1208 
1209         //
1210         // properties and information
1211         //
1212         bool isFiniteNumber() const;
1213         bool isPositiveNumber() const;
1214         bool isZero() const;
1215         bool isNegativeNumber() const;
1216         const ampf getUlpOf();
1217 
1218         //
1219         // conversions
1220         //
1221         double toDouble() const;
1222         std::string toHex() const;
1223         std::string toDec() const;
1224         char * toString() const;
1225 
1226 
1227         //
1228         // static methods
1229         //
1230         static const ampf getUlpOf(const ampf &x);
1231         static const ampf getUlp();
1232         static const ampf getUlp256();
1233         static const ampf getUlp512();
1234         static const ampf getMaxNumber();
1235         static const ampf getMinNumber();
1236         static const ampf getAlgoPascalEpsilon();
1237         static const ampf getAlgoPascalMaxNumber();
1238         static const ampf getAlgoPascalMinNumber();
1239         static const ampf getRandom();
1240     private:
1241         void CheckPrecision();
1242         void InitializeAsZero();
1243         void InitializeAsSLong(signed long v);
1244         void InitializeAsULong(unsigned long v);
1245         void InitializeAsDouble(long double v);
1246         void InitializeAsString(const char *s);
1247 
1248         //mpfr_reference  ref;
1249         mpfr_record *rval;
1250     };
1251 
1252     /*void ampf<Precision>::CheckPrecision()
1253     {
1254         if( Precision<32 )
1255             throw incorrectPrecision();
1256     }***/
1257 
1258     template<unsigned int Precision>
CheckPrecision()1259     void ampf<Precision>::CheckPrecision()
1260     {
1261         if( Precision<32 )
1262             //throw incorrectPrecision();
1263             WerrorS("incorrectPrecision");
1264     }
1265 
1266     template<unsigned int Precision>
InitializeAsZero()1267     void ampf<Precision>::InitializeAsZero()
1268     {
1269         CheckPrecision();
1270         rval = mpfr_storage::newMpfr(Precision);
1271         mpfr_set_ui(getWritePtr(), 0, GMP_RNDN);
1272     }
1273 
1274     template<unsigned int Precision>
InitializeAsSLong(signed long sv)1275     void ampf<Precision>::InitializeAsSLong(signed long sv)
1276     {
1277         CheckPrecision();
1278         rval = mpfr_storage::newMpfr(Precision);
1279         mpfr_set_si(getWritePtr(), sv, GMP_RNDN);
1280     }
1281 
1282     template<unsigned int Precision>
InitializeAsULong(unsigned long v)1283     void ampf<Precision>::InitializeAsULong(unsigned long v)
1284     {
1285         CheckPrecision();
1286         rval = mpfr_storage::newMpfr(Precision);
1287         mpfr_set_ui(getWritePtr(), v, GMP_RNDN);
1288     }
1289 
1290     template<unsigned int Precision>
InitializeAsDouble(long double v)1291     void ampf<Precision>::InitializeAsDouble(long double v)
1292     {
1293         CheckPrecision();
1294         rval = mpfr_storage::newMpfr(Precision);
1295         mpfr_set_ld(getWritePtr(), v, GMP_RNDN);
1296     }
1297 
1298     template<unsigned int Precision>
InitializeAsString(const char * s)1299     void ampf<Precision>::InitializeAsString(const char *s)
1300     {
1301         CheckPrecision();
1302         rval = mpfr_storage::newMpfr(Precision);
1303         mpfr_strtofr(getWritePtr(), s, NULL, 0, GMP_RNDN);
1304     }
1305 
1306     template<unsigned int Precision>
getReadPtr()1307     mpfr_srcptr ampf<Precision>::getReadPtr() const
1308     {
1309         return rval->value;
1310     }
1311 
1312     template<unsigned int Precision>
getWritePtr()1313     mpfr_ptr ampf<Precision>::getWritePtr()
1314     {
1315         if( rval->refCount==1 )
1316             return rval->value;
1317         mpfr_record *newrval = mpfr_storage::newMpfr(Precision);
1318         mpfr_set(newrval->value, rval->value, GMP_RNDN);
1319         rval->refCount--;
1320         rval = newrval;
1321         return rval->value;
1322     }
1323 
1324     template<unsigned int Precision>
isFiniteNumber()1325     bool ampf<Precision>::isFiniteNumber() const
1326     {
1327         return mpfr_number_p(getReadPtr())!=0;
1328     }
1329 
1330     template<unsigned int Precision>
isPositiveNumber()1331     bool ampf<Precision>::isPositiveNumber() const
1332     {
1333         if( !isFiniteNumber() )
1334             return false;
1335         return mpfr_sgn(getReadPtr())>0;
1336     }
1337 
1338     template<unsigned int Precision>
isZero()1339     bool ampf<Precision>::isZero() const
1340     {
1341         return mpfr_zero_p(getReadPtr())!=0;
1342     }
1343 
1344     template<unsigned int Precision>
isNegativeNumber()1345     bool ampf<Precision>::isNegativeNumber() const
1346     {
1347         if( !isFiniteNumber() )
1348             return false;
1349         return mpfr_sgn(getReadPtr())<0;
1350     }
1351 
1352     template<unsigned int Precision>
getUlpOf()1353     const ampf<Precision> ampf<Precision>::getUlpOf()
1354     {
1355         return getUlpOf(*this);
1356     }
1357 
1358     template<unsigned int Precision>
toDouble()1359     double ampf<Precision>::toDouble() const
1360     {
1361         return mpfr_get_d(getReadPtr(), GMP_RNDN);
1362     }
1363 
1364     template<unsigned int Precision>
toHex()1365     std::string ampf<Precision>::toHex() const
1366     {
1367         //
1368         // some special cases
1369         //
1370         if( !isFiniteNumber() )
1371         {
1372             std::string r;
1373             mp_exp_t _e;
1374             char *ptr;
1375             ptr = mpfr_get_str(NULL, &_e, 16, 0, getReadPtr(), GMP_RNDN);
1376             r = ptr;
1377             mpfr_free_str(ptr);
1378             return r;
1379         }
1380 
1381         //
1382         // general case
1383         //
1384         std::string r;
1385         char buf_e[128];
1386         signed long iexpval;
1387         mp_exp_t expval;
1388         char *ptr;
1389         char *ptr2;
1390         ptr = mpfr_get_str(NULL, &expval, 16, 0, getReadPtr(), GMP_RNDN);
1391         ptr2 = ptr;
1392         iexpval = expval;
1393         if( iexpval!=expval )
1394         //    throw internalError();
1395             WerrorS("internalError");
1396         sprintf(buf_e, "%ld", long(iexpval));
1397         if( *ptr=='-' )
1398         {
1399             r = "-";
1400             ptr++;
1401         }
1402         r += "0x0.";
1403         r += ptr;
1404         r += "@";
1405         r += buf_e;
1406         mpfr_free_str(ptr2);
1407         return r;
1408     }
1409 
1410     template<unsigned int Precision>
toDec()1411     std::string ampf<Precision>::toDec() const
1412     {
1413         // TODO: advanced output formatting (zero, integers)
1414 
1415         //
1416         // some special cases
1417         //
1418         if( !isFiniteNumber() )
1419         {
1420             std::string r;
1421             mp_exp_t _e;
1422             char *ptr;
1423             ptr = mpfr_get_str(NULL, &_e, 10, 0, getReadPtr(), GMP_RNDN);
1424             r = ptr;
1425             mpfr_free_str(ptr);
1426             return r;
1427         }
1428 
1429         //
1430         // general case
1431         //
1432         std::string r;
1433         char buf_e[128];
1434         signed long iexpval;
1435         mp_exp_t expval;
1436         char *ptr;
1437         char *ptr2;
1438         ptr = mpfr_get_str(NULL, &expval, 10, 0, getReadPtr(), GMP_RNDN);
1439         ptr2 = ptr;
1440         iexpval = expval;
1441         if( iexpval!=expval )
1442         //    throw internalError();
1443             WerrorS("internalError");
1444         sprintf(buf_e, "%ld", long(iexpval));
1445         if( *ptr=='-' )
1446         {
1447             r = "-";
1448             ptr++;
1449         }
1450         r += "0.";
1451         r += ptr;
1452         r += "E";
1453         r += buf_e;
1454         mpfr_free_str(ptr2);
1455         return r;
1456     }
1457     template<unsigned int Precision>
toString()1458     char * ampf<Precision>::toString() const
1459     {
1460          char *toString_Block=(char *)omAlloc(256);
1461         //
1462         // some special cases
1463         //
1464         if( !isFiniteNumber() )
1465         {
1466             mp_exp_t _e;
1467             char *ptr;
1468             ptr = mpfr_get_str(NULL, &_e, 10, 0, getReadPtr(), GMP_RNDN);
1469             strcpy(toString_Block, ptr);
1470             mpfr_free_str(ptr);
1471             return toString_Block;
1472         }
1473 
1474         //
1475         // general case
1476         //
1477 
1478         char buf_e[128];
1479         signed long iexpval;
1480         mp_exp_t expval;
1481         char *ptr;
1482         char *ptr2;
1483         ptr = mpfr_get_str(NULL, &expval, 10, 0, getReadPtr(), GMP_RNDN);
1484         ptr2 = ptr;
1485         iexpval = expval;
1486         if( iexpval!=expval )
1487             //throw internalError();
1488             WerrorS("internalError");
1489         sprintf(buf_e, "%ld", long(iexpval));
1490         if( *ptr=='-' )
1491         {
1492             ptr++;
1493            sprintf(toString_Block,"-0.%sE%s",ptr,buf_e);
1494         }
1495         else
1496           sprintf(toString_Block,"0.%sE%s",ptr,buf_e);
1497         mpfr_free_str(ptr2);
1498         return toString_Block;
1499     }
1500 
1501     template<unsigned int Precision>
getUlpOf(const ampf<Precision> & x)1502     const ampf<Precision> ampf<Precision>::getUlpOf(const ampf<Precision> &x)
1503     {
1504         if( !x.isFiniteNumber() )
1505             return x;
1506         if( x.isZero() )
1507             return x;
1508         ampf<Precision> r(1);
1509         mpfr_nextabove(r.getWritePtr());
1510         mpfr_sub_ui(r.getWritePtr(), r.getWritePtr(), 1, GMP_RNDN);
1511         mpfr_mul_2si(
1512             r.getWritePtr(),
1513             r.getWritePtr(),
1514             mpfr_get_exp(x.getReadPtr()),
1515             GMP_RNDN);
1516         mpfr_div_2si(
1517             r.getWritePtr(),
1518             r.getWritePtr(),
1519             1,
1520             GMP_RNDN);
1521         return r;
1522     }
1523 
1524     template<unsigned int Precision>
getUlp()1525     const ampf<Precision> ampf<Precision>::getUlp()
1526     {
1527         ampf<Precision> r(1);
1528         mpfr_nextabove(r.getWritePtr());
1529         mpfr_sub_ui(r.getWritePtr(), r.getWritePtr(), 1, GMP_RNDN);
1530         return r;
1531     }
1532 
1533     template<unsigned int Precision>
getUlp256()1534     const ampf<Precision> ampf<Precision>::getUlp256()
1535     {
1536         ampf<Precision> r(1);
1537         mpfr_nextabove(r.getWritePtr());
1538         mpfr_sub_ui(r.getWritePtr(), r.getWritePtr(), 1, GMP_RNDN);
1539         mpfr_mul_2si(
1540             r.getWritePtr(),
1541             r.getWritePtr(),
1542             8,
1543             GMP_RNDN);
1544         return r;
1545     }
1546 
1547     template<unsigned int Precision>
getUlp512()1548     const ampf<Precision> ampf<Precision>::getUlp512()
1549     {
1550         ampf<Precision> r(1);
1551         mpfr_nextabove(r.getWritePtr());
1552         mpfr_sub_ui(r.getWritePtr(), r.getWritePtr(), 1, GMP_RNDN);
1553         mpfr_mul_2si(
1554             r.getWritePtr(),
1555             r.getWritePtr(),
1556             9,
1557             GMP_RNDN);
1558         return r;
1559     }
1560 
1561     template<unsigned int Precision>
getMaxNumber()1562     const ampf<Precision> ampf<Precision>::getMaxNumber()
1563     {
1564         ampf<Precision> r(1);
1565         mpfr_nextbelow(r.getWritePtr());
1566         mpfr_set_exp(r.getWritePtr(),mpfr_get_emax());
1567         return r;
1568     }
1569 
1570     template<unsigned int Precision>
getMinNumber()1571     const ampf<Precision> ampf<Precision>::getMinNumber()
1572     {
1573         ampf<Precision> r(1);
1574         mpfr_set_exp(r.getWritePtr(),mpfr_get_emin());
1575         return r;
1576     }
1577 
1578     template<unsigned int Precision>
getAlgoPascalEpsilon()1579     const ampf<Precision> ampf<Precision>::getAlgoPascalEpsilon()
1580     {
1581         return getUlp256();
1582     }
1583 
1584     template<unsigned int Precision>
getAlgoPascalMaxNumber()1585     const ampf<Precision> ampf<Precision>::getAlgoPascalMaxNumber()
1586     {
1587         ampf<Precision> r(1);
1588         mp_exp_t e1 = mpfr_get_emax();
1589         mp_exp_t e2 = -mpfr_get_emin();
1590         mp_exp_t e  = e1>e2 ? e1 : e2;
1591         mpfr_set_exp(r.getWritePtr(), e-5);
1592         return r;
1593     }
1594 
1595     template<unsigned int Precision>
getAlgoPascalMinNumber()1596     const ampf<Precision> ampf<Precision>::getAlgoPascalMinNumber()
1597     {
1598         ampf<Precision> r(1);
1599         mp_exp_t e1 = mpfr_get_emax();
1600         mp_exp_t e2 = -mpfr_get_emin();
1601         mp_exp_t e  = e1>e2 ? e1 : e2;
1602         mpfr_set_exp(r.getWritePtr(), 2-(e-5));
1603         return r;
1604     }
1605 
1606     template<unsigned int Precision>
getRandom()1607     const ampf<Precision> ampf<Precision>::getRandom()
1608     {
1609         ampf<Precision> r;
1610         while(mpfr_urandomb(r.getWritePtr(), *amp::mpfr_storage::getRandState()));
1611         return r;
1612     }
1613 
1614     //
1615     // comparison operators
1616     //
1617     template<unsigned int Precision>
1618     bool operator==(const ampf<Precision>& op1, const ampf<Precision>& op2)
1619     {
1620         return mpfr_cmp(op1.getReadPtr(), op2.getReadPtr())==0;
1621     }
1622 
1623     template<unsigned int Precision>
1624     bool operator!=(const ampf<Precision>& op1, const ampf<Precision>& op2)
1625     {
1626         return mpfr_cmp(op1.getReadPtr(), op2.getReadPtr())!=0;
1627     }
1628 
1629     template<unsigned int Precision>
1630     bool operator<(const ampf<Precision>& op1, const ampf<Precision>& op2)
1631     {
1632         return mpfr_cmp(op1.getReadPtr(), op2.getReadPtr())<0;
1633     }
1634 
1635     template<unsigned int Precision>
1636     bool operator>(const ampf<Precision>& op1, const ampf<Precision>& op2)
1637     {
1638         return mpfr_cmp(op1.getReadPtr(), op2.getReadPtr())>0;
1639     }
1640 
1641     template<unsigned int Precision>
1642     bool operator<=(const ampf<Precision>& op1, const ampf<Precision>& op2)
1643     {
1644         return mpfr_cmp(op1.getReadPtr(), op2.getReadPtr())<=0;
1645     }
1646 
1647     template<unsigned int Precision>
1648     bool operator>=(const ampf<Precision>& op1, const ampf<Precision>& op2)
1649     {
1650         return mpfr_cmp(op1.getReadPtr(), op2.getReadPtr())>=0;
1651     }
1652 
1653     //
1654     // arithmetic operators
1655     //
1656     template<unsigned int Precision>
1657     const ampf<Precision> operator+(const ampf<Precision>& op1)
1658     {
1659         return op1;
1660     }
1661 
1662     template<unsigned int Precision>
1663     const ampf<Precision> operator-(const ampf<Precision>& op1)
1664     {
1665         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1666         mpfr_neg(v->value, op1.getReadPtr(), GMP_RNDN);
1667         return v;
1668     }
1669 
1670     template<unsigned int Precision>
1671     const ampf<Precision> operator+(const ampf<Precision>& op1, const ampf<Precision>& op2)
1672     {
1673         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1674         mpfr_add(v->value, op1.getReadPtr(), op2.getReadPtr(), GMP_RNDN);
1675         return v;
1676     }
1677 
1678     template<unsigned int Precision>
1679     const ampf<Precision> operator-(const ampf<Precision>& op1, const ampf<Precision>& op2)
1680     {
1681         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1682         mpfr_sub(v->value, op1.getReadPtr(), op2.getReadPtr(), GMP_RNDN);
1683         return v;
1684     }
1685 
1686 
1687     template<unsigned int Precision>
1688     const ampf<Precision> operator*(const ampf<Precision>& op1, const ampf<Precision>& op2)
1689     {
1690         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1691         mpfr_mul(v->value, op1.getReadPtr(), op2.getReadPtr(), GMP_RNDN);
1692         return v;
1693     }
1694 
1695     template<unsigned int Precision>
1696     const ampf<Precision> operator/(const ampf<Precision>& op1, const ampf<Precision>& op2)
1697     {
1698         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1699         mpfr_div(v->value, op1.getReadPtr(), op2.getReadPtr(), GMP_RNDN);
1700         return v;
1701     }
1702 
1703     //
1704     // basic functions
1705     //
1706     template<unsigned int Precision>
sqr(const ampf<Precision> & x)1707     const ampf<Precision> sqr(const ampf<Precision> &x)
1708     {
1709         // TODO: optimize temporary for return value
1710         ampf<Precision> res;
1711         mpfr_sqr(res.getWritePtr(), x.getReadPtr(), GMP_RNDN);
1712         return res;
1713     }
1714 
1715     template<unsigned int Precision>
sign(const ampf<Precision> & x)1716     int sign(const ampf<Precision> &x)
1717     {
1718         int s = mpfr_sgn(x.getReadPtr());
1719         if( s>0 )
1720             return +1;
1721         if( s<0 )
1722             return -1;
1723         return 0;
1724     }
1725 
1726     template<unsigned int Precision>
abs(const ampf<Precision> & x)1727     const ampf<Precision> abs(const ampf<Precision> &x)
1728     {
1729         // TODO: optimize temporary for return value
1730         ampf<Precision> res;
1731         mpfr_abs(res.getWritePtr(), x.getReadPtr(), GMP_RNDN);
1732         return res;
1733     }
1734 
1735     template<unsigned int Precision>
maximum(const ampf<Precision> & x,const ampf<Precision> & y)1736     const ampf<Precision> maximum(const ampf<Precision> &x, const ampf<Precision> &y)
1737     {
1738         // TODO: optimize temporary for return value
1739         ampf<Precision> res;
1740         mpfr_max(res.getWritePtr(), x.getReadPtr(), y.getReadPtr(), GMP_RNDN);
1741         return res;
1742     }
1743 
1744     template<unsigned int Precision>
minimum(const ampf<Precision> & x,const ampf<Precision> & y)1745     const ampf<Precision> minimum(const ampf<Precision> &x, const ampf<Precision> &y)
1746     {
1747         // TODO: optimize temporary for return value
1748         ampf<Precision> res;
1749         mpfr_min(res.getWritePtr(), x.getReadPtr(), y.getReadPtr(), GMP_RNDN);
1750         return res;
1751     }
1752 
1753     template<unsigned int Precision>
sqrt(const ampf<Precision> & x)1754     const ampf<Precision> sqrt(const ampf<Precision> &x)
1755     {
1756         // TODO: optimize temporary for return value
1757         ampf<Precision> res;
1758         mpfr_sqrt(res.getWritePtr(), x.getReadPtr(), GMP_RNDN);
1759         return res;
1760     }
1761 
1762     template<unsigned int Precision>
trunc(const ampf<Precision> & x)1763     signed long trunc(const ampf<Precision> &x)
1764     {
1765         ampf<Precision> tmp;
1766         signed long r;
1767         mpfr_trunc(tmp.getWritePtr(), x.getReadPtr());
1768         if( mpfr_integer_p(tmp.getReadPtr())==0 )
1769             //throw invalidConversion();
1770             WerrorS("internalError");
1771         mpfr_clear_erangeflag();
1772         r = mpfr_get_si(tmp.getReadPtr(), GMP_RNDN);
1773         if( mpfr_erangeflag_p()!=0 )
1774             //throw invalidConversion();
1775             WerrorS("internalError");
1776         return r;
1777     }
1778 
1779     template<unsigned int Precision>
frac(const ampf<Precision> & x)1780     const ampf<Precision> frac(const ampf<Precision> &x)
1781     {
1782         // TODO: optimize temporary for return value
1783         ampf<Precision> r;
1784         mpfr_frac(r.getWritePtr(), x.getReadPtr(), GMP_RNDN);
1785         return r;
1786     }
1787 
1788     template<unsigned int Precision>
floor(const ampf<Precision> & x)1789     signed long floor(const ampf<Precision> &x)
1790     {
1791         ampf<Precision> tmp;
1792         signed long r;
1793         mpfr_floor(tmp.getWritePtr(), x.getReadPtr());
1794         if( mpfr_integer_p(tmp.getReadPtr())==0 )
1795             //throw invalidConversion();
1796             WerrorS("internalError");
1797         mpfr_clear_erangeflag();
1798         r = mpfr_get_si(tmp.getReadPtr(), GMP_RNDN);
1799         if( mpfr_erangeflag_p()!=0 )
1800             //throw invalidConversion();
1801             WerrorS("internalError");
1802         return r;
1803     }
1804 
1805     template<unsigned int Precision>
ceil(const ampf<Precision> & x)1806     signed long ceil(const ampf<Precision> &x)
1807     {
1808         ampf<Precision> tmp;
1809         signed long r;
1810         mpfr_ceil(tmp.getWritePtr(), x.getReadPtr());
1811         if( mpfr_integer_p(tmp.getReadPtr())==0 )
1812             //throw invalidConversion();
1813             WerrorS("internalError");
1814         mpfr_clear_erangeflag();
1815         r = mpfr_get_si(tmp.getReadPtr(), GMP_RNDN);
1816         if( mpfr_erangeflag_p()!=0 )
1817             //throw invalidConversion();
1818             WerrorS("internalError");
1819         return r;
1820     }
1821 
1822     template<unsigned int Precision>
round(const ampf<Precision> & x)1823     signed long round(const ampf<Precision> &x)
1824     {
1825         ampf<Precision> tmp;
1826         signed long r;
1827         mpfr_round(tmp.getWritePtr(), x.getReadPtr());
1828         if( mpfr_integer_p(tmp.getReadPtr())==0 )
1829             //throw invalidConversion();
1830             WerrorS("internalError");
1831         mpfr_clear_erangeflag();
1832         r = mpfr_get_si(tmp.getReadPtr(), GMP_RNDN);
1833         if( mpfr_erangeflag_p()!=0 )
1834             //throw invalidConversion();
1835             WerrorS("internalError");
1836         return r;
1837     }
1838 
1839     template<unsigned int Precision>
frexp2(const ampf<Precision> & x,mp_exp_t * exponent)1840     const ampf<Precision> frexp2(const ampf<Precision> &x, mp_exp_t *exponent)
1841     {
1842         // TODO: optimize temporary for return value
1843         ampf<Precision> r;
1844         if( !x.isFiniteNumber() )
1845             //throw invalidConversion();
1846             WerrorS("internalError");
1847         if( x.isZero() )
1848         {
1849             *exponent = 0;
1850             r = 0;
1851             return r;
1852         }
1853         r = x;
1854         *exponent = mpfr_get_exp(r.getReadPtr());
1855         mpfr_set_exp(r.getWritePtr(),0);
1856         return r;
1857     }
1858 
1859     template<unsigned int Precision>
ldexp2(const ampf<Precision> & x,mp_exp_t exponent)1860     const ampf<Precision> ldexp2(const ampf<Precision> &x, mp_exp_t exponent)
1861     {
1862         // TODO: optimize temporary for return value
1863         ampf<Precision> r;
1864         mpfr_mul_2si(r.getWritePtr(), x.getReadPtr(), exponent, GMP_RNDN);
1865         return r;
1866     }
1867 
1868     //
1869     // different types of arguments
1870     //
1871     #define __AMP_BINARY_OPI(type) \
1872         template<unsigned int Precision> const ampf<Precision> operator+(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)+op2; }   \
1873         template<unsigned int Precision> const ampf<Precision> operator+(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)+op2; } \
1874         template<unsigned int Precision> const ampf<Precision> operator+(const ampf<Precision>& op1, const signed type& op2) { return op1+ampf<Precision>(op2); }   \
1875         template<unsigned int Precision> const ampf<Precision> operator+(const ampf<Precision>& op1, const unsigned type& op2) { return op1+ampf<Precision>(op2); } \
1876         template<unsigned int Precision> const ampf<Precision> operator-(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)-op2; }   \
1877         template<unsigned int Precision> const ampf<Precision> operator-(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)-op2; } \
1878         template<unsigned int Precision> const ampf<Precision> operator-(const ampf<Precision>& op1, const signed type& op2) { return op1-ampf<Precision>(op2); }   \
1879         template<unsigned int Precision> const ampf<Precision> operator-(const ampf<Precision>& op1, const unsigned type& op2) { return op1-ampf<Precision>(op2); } \
1880         template<unsigned int Precision> const ampf<Precision> operator*(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)*op2; }   \
1881         template<unsigned int Precision> const ampf<Precision> operator*(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)*op2; } \
1882         template<unsigned int Precision> const ampf<Precision> operator*(const ampf<Precision>& op1, const signed type& op2) { return op1*ampf<Precision>(op2); }   \
1883         template<unsigned int Precision> const ampf<Precision> operator*(const ampf<Precision>& op1, const unsigned type& op2) { return op1*ampf<Precision>(op2); } \
1884         template<unsigned int Precision> const ampf<Precision> operator/(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)/op2; }   \
1885         template<unsigned int Precision> const ampf<Precision> operator/(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)/op2; } \
1886         template<unsigned int Precision> const ampf<Precision> operator/(const ampf<Precision>& op1, const signed type& op2) { return op1/ampf<Precision>(op2); }   \
1887         template<unsigned int Precision> const ampf<Precision> operator/(const ampf<Precision>& op1, const unsigned type& op2) { return op1/ampf<Precision>(op2); } \
1888         template<unsigned int Precision> bool       operator==(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)==op2; }   \
1889         template<unsigned int Precision> bool       operator==(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)==op2; } \
1890         template<unsigned int Precision> bool       operator==(const ampf<Precision>& op1, const signed type& op2) { return op1==ampf<Precision>(op2); }   \
1891         template<unsigned int Precision> bool       operator==(const ampf<Precision>& op1, const unsigned type& op2) { return op1==ampf<Precision>(op2); } \
1892         template<unsigned int Precision> bool       operator!=(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)!=op2; }   \
1893         template<unsigned int Precision> bool       operator!=(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)!=op2; } \
1894         template<unsigned int Precision> bool       operator!=(const ampf<Precision>& op1, const signed type& op2) { return op1!=ampf<Precision>(op2); }   \
1895         template<unsigned int Precision> bool       operator!=(const ampf<Precision>& op1, const unsigned type& op2) { return op1!=ampf<Precision>(op2); } \
1896         template<unsigned int Precision> bool       operator<=(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)<=op2; }   \
1897         template<unsigned int Precision> bool       operator<=(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)<=op2; } \
1898         template<unsigned int Precision> bool       operator<=(const ampf<Precision>& op1, const signed type& op2) { return op1<=ampf<Precision>(op2); }   \
1899         template<unsigned int Precision> bool       operator<=(const ampf<Precision>& op1, const unsigned type& op2) { return op1<=ampf<Precision>(op2); } \
1900         template<unsigned int Precision> bool       operator>=(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)>=op2; }   \
1901         template<unsigned int Precision> bool       operator>=(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)>=op2; } \
1902         template<unsigned int Precision> bool       operator>=(const ampf<Precision>& op1, const signed type& op2) { return op1>=ampf<Precision>(op2); }   \
1903         template<unsigned int Precision> bool       operator>=(const ampf<Precision>& op1, const unsigned type& op2) { return op1>=ampf<Precision>(op2); } \
1904         template<unsigned int Precision> bool       operator<(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)<op2; }   \
1905         template<unsigned int Precision> bool       operator<(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)<op2; } \
1906         template<unsigned int Precision> bool       operator<(const ampf<Precision>& op1, const signed type& op2) { return op1<ampf<Precision>(op2); }   \
1907         template<unsigned int Precision> bool       operator<(const ampf<Precision>& op1, const unsigned type& op2) { return op1<ampf<Precision>(op2); } \
1908         template<unsigned int Precision> bool       operator>(const signed type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)>op2; }   \
1909         template<unsigned int Precision> bool       operator>(const unsigned type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)>op2; } \
1910         template<unsigned int Precision> bool       operator>(const ampf<Precision>& op1, const signed type& op2) { return op1>ampf<Precision>(op2); }   \
1911         template<unsigned int Precision> bool       operator>(const ampf<Precision>& op1, const unsigned type& op2) { return op1>ampf<Precision>(op2); }
1912     __AMP_BINARY_OPI(char)
__AMP_BINARY_OPI(short)1913     __AMP_BINARY_OPI(short)
1914     __AMP_BINARY_OPI(long)
1915     __AMP_BINARY_OPI(int)
1916     #undef __AMP_BINARY_OPI
1917     #define __AMP_BINARY_OPF(type) \
1918         template<unsigned int Precision> const ampf<Precision> operator+(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)+op2; } \
1919         template<unsigned int Precision> const ampf<Precision> operator+(const ampf<Precision>& op1, const type& op2) { return op1+ampf<Precision>(op2); } \
1920         template<unsigned int Precision> const ampf<Precision> operator-(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)-op2; } \
1921         template<unsigned int Precision> const ampf<Precision> operator-(const ampf<Precision>& op1, const type& op2) { return op1-ampf<Precision>(op2); } \
1922         template<unsigned int Precision> const ampf<Precision> operator*(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)*op2; } \
1923         template<unsigned int Precision> const ampf<Precision> operator*(const ampf<Precision>& op1, const type& op2) { return op1*ampf<Precision>(op2); } \
1924         template<unsigned int Precision> const ampf<Precision> operator/(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)/op2; } \
1925         template<unsigned int Precision> const ampf<Precision> operator/(const ampf<Precision>& op1, const type& op2) { return op1/ampf<Precision>(op2); } \
1926         template<unsigned int Precision> bool             operator==(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)==op2; } \
1927         template<unsigned int Precision> bool             operator==(const ampf<Precision>& op1, const type& op2) { return op1==ampf<Precision>(op2); } \
1928         template<unsigned int Precision> bool             operator!=(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)!=op2; } \
1929         template<unsigned int Precision> bool             operator!=(const ampf<Precision>& op1, const type& op2) { return op1!=ampf<Precision>(op2); } \
1930         template<unsigned int Precision> bool             operator<=(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)<=op2; } \
1931         template<unsigned int Precision> bool             operator<=(const ampf<Precision>& op1, const type& op2) { return op1<=ampf<Precision>(op2); } \
1932         template<unsigned int Precision> bool             operator>=(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)>=op2; } \
1933         template<unsigned int Precision> bool             operator>=(const ampf<Precision>& op1, const type& op2) { return op1>=ampf<Precision>(op2); } \
1934         template<unsigned int Precision> bool             operator<(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)<op2; } \
1935         template<unsigned int Precision> bool             operator<(const ampf<Precision>& op1, const type& op2) { return op1<ampf<Precision>(op2); } \
1936         template<unsigned int Precision> bool             operator>(const type& op1, const ampf<Precision>& op2) { return ampf<Precision>(op1)>op2; } \
1937         template<unsigned int Precision> bool             operator>(const ampf<Precision>& op1, const type& op2) { return op1>ampf<Precision>(op2); }
1938     __AMP_BINARY_OPF(float)
1939     __AMP_BINARY_OPF(double)
1940     __AMP_BINARY_OPF(long double)
1941     #undef __AMP_BINARY_OPF
1942 
1943     //
1944     // transcendent functions
1945     //
1946     template<unsigned int Precision>
1947     const ampf<Precision> pi()
1948     {
1949         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1950         mpfr_const_pi(v->value, GMP_RNDN);
1951         return v;
1952     }
1953 
1954     template<unsigned int Precision>
halfpi()1955     const ampf<Precision> halfpi()
1956     {
1957         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1958         mpfr_const_pi(v->value, GMP_RNDN);
1959         mpfr_mul_2si(v->value, v->value, -1, GMP_RNDN);
1960         return v;
1961     }
1962 
1963     template<unsigned int Precision>
twopi()1964     const ampf<Precision> twopi()
1965     {
1966         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1967         mpfr_const_pi(v->value, GMP_RNDN);
1968         mpfr_mul_2si(v->value, v->value, +1, GMP_RNDN);
1969         return v;
1970     }
1971 
1972     template<unsigned int Precision>
sin(const ampf<Precision> & x)1973     const ampf<Precision> sin(const ampf<Precision> &x)
1974     {
1975         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1976         mpfr_sin(v->value, x.getReadPtr(), GMP_RNDN);
1977         return v;
1978     }
1979 
1980     template<unsigned int Precision>
cos(const ampf<Precision> & x)1981     const ampf<Precision> cos(const ampf<Precision> &x)
1982     {
1983         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1984         mpfr_cos(v->value, x.getReadPtr(), GMP_RNDN);
1985         return v;
1986     }
1987 
1988     template<unsigned int Precision>
tan(const ampf<Precision> & x)1989     const ampf<Precision> tan(const ampf<Precision> &x)
1990     {
1991         mpfr_record *v = mpfr_storage::newMpfr(Precision);
1992         mpfr_tan(v->value, x.getReadPtr(), GMP_RNDN);
1993         return v;
1994     }
1995 
1996     template<unsigned int Precision>
asin(const ampf<Precision> & x)1997     const ampf<Precision> asin(const ampf<Precision> &x)
1998     {
1999         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2000         mpfr_asin(v->value, x.getReadPtr(), GMP_RNDN);
2001         return v;
2002     }
2003 
2004     template<unsigned int Precision>
acos(const ampf<Precision> & x)2005     const ampf<Precision> acos(const ampf<Precision> &x)
2006     {
2007         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2008         mpfr_acos(v->value, x.getReadPtr(), GMP_RNDN);
2009         return v;
2010     }
2011 
2012     template<unsigned int Precision>
atan(const ampf<Precision> & x)2013     const ampf<Precision> atan(const ampf<Precision> &x)
2014     {
2015         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2016         mpfr_atan(v->value, x.getReadPtr(), GMP_RNDN);
2017         return v;
2018     }
2019 
2020     template<unsigned int Precision>
atan2(const ampf<Precision> & y,const ampf<Precision> & x)2021     const ampf<Precision> atan2(const ampf<Precision> &y, const ampf<Precision> &x)
2022     {
2023         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2024         mpfr_atan2(v->value, y.getReadPtr(), x.getReadPtr(), GMP_RNDN);
2025         return v;
2026     }
2027 
2028     template<unsigned int Precision>
log(const ampf<Precision> & x)2029     const ampf<Precision> log(const ampf<Precision> &x)
2030     {
2031         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2032         mpfr_log(v->value, x.getReadPtr(), GMP_RNDN);
2033         return v;
2034     }
2035 
2036     template<unsigned int Precision>
log2(const ampf<Precision> & x)2037     const ampf<Precision> log2(const ampf<Precision> &x)
2038     {
2039         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2040         mpfr_log2(v->value, x.getReadPtr(), GMP_RNDN);
2041         return v;
2042     }
2043 
2044     template<unsigned int Precision>
log10(const ampf<Precision> & x)2045     const ampf<Precision> log10(const ampf<Precision> &x)
2046     {
2047         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2048         mpfr_log10(v->value, x.getReadPtr(), GMP_RNDN);
2049         return v;
2050     }
2051 
2052     template<unsigned int Precision>
exp(const ampf<Precision> & x)2053     const ampf<Precision> exp(const ampf<Precision> &x)
2054     {
2055         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2056         mpfr_exp(v->value, x.getReadPtr(), GMP_RNDN);
2057         return v;
2058     }
2059 
2060     template<unsigned int Precision>
sinh(const ampf<Precision> & x)2061     const ampf<Precision> sinh(const ampf<Precision> &x)
2062     {
2063         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2064         mpfr_sinh(v->value, x.getReadPtr(), GMP_RNDN);
2065         return v;
2066     }
2067 
2068     template<unsigned int Precision>
cosh(const ampf<Precision> & x)2069     const ampf<Precision> cosh(const ampf<Precision> &x)
2070     {
2071         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2072         mpfr_cosh(v->value, x.getReadPtr(), GMP_RNDN);
2073         return v;
2074     }
2075 
2076     template<unsigned int Precision>
tanh(const ampf<Precision> & x)2077     const ampf<Precision> tanh(const ampf<Precision> &x)
2078     {
2079         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2080         mpfr_tanh(v->value, x.getReadPtr(), GMP_RNDN);
2081         return v;
2082     }
2083 
2084     template<unsigned int Precision>
pow(const ampf<Precision> & x,const ampf<Precision> & y)2085     const ampf<Precision> pow(const ampf<Precision> &x, const ampf<Precision> &y)
2086     {
2087         mpfr_record *v = mpfr_storage::newMpfr(Precision);
2088         mpfr_pow(v->value, x.getReadPtr(), y.getReadPtr(), GMP_RNDN);
2089         return v;
2090     }
2091 
2092     //
2093     // complex ampf
2094     //
2095     template<unsigned int Precision>
2096     class campf
2097     {
2098     public:
campf()2099         campf():x(0),y(0){};
campf(long double v)2100         campf(long double v)    { x=v; y=0; }
campf(double v)2101         campf(double v)         { x=v; y=0; }
campf(float v)2102         campf(float v)          { x=v; y=0; }
campf(signed long v)2103         campf(signed long v)    { x=v; y=0; }
campf(unsigned long v)2104         campf(unsigned long v)  { x=v; y=0; }
campf(signed int v)2105         campf(signed int v)     { x=v; y=0; }
campf(unsigned int v)2106         campf(unsigned int v)   { x=v; y=0; }
campf(signed short v)2107         campf(signed short v)   { x=v; y=0; }
campf(unsigned short v)2108         campf(unsigned short v) { x=v; y=0; }
campf(signed char v)2109         campf(signed char v)    { x=v; y=0; }
campf(unsigned char v)2110         campf(unsigned char v)  { x=v; y=0; }
campf(const ampf<Precision> & _x)2111         campf(const ampf<Precision> &_x):x(_x),y(0){};
campf(const ampf<Precision> & _x,const ampf<Precision> & _y)2112         campf(const ampf<Precision> &_x, const ampf<Precision> &_y):x(_x),y(_y){};
campf(const campf & z)2113         campf(const campf &z):x(z.x),y(z.y){};
2114 #ifndef _AMP_NO_TEMPLATE_CONSTRUCTORS
2115         template<unsigned int Prec2>
campf(const campf<Prec2> & z)2116         campf(const campf<Prec2> &z):x(z.x),y(z.y){};
2117 #endif
2118 
2119         campf& operator= (long double v)         { x=v; y=0; return *this; }
2120         campf& operator= (double v)              { x=v; y=0; return *this; }
2121         campf& operator= (float v)               { x=v; y=0; return *this; }
2122         campf& operator= (signed long v)         { x=v; y=0; return *this; }
2123         campf& operator= (unsigned long v)       { x=v; y=0; return *this; }
2124         campf& operator= (signed int v)          { x=v; y=0; return *this; }
2125         campf& operator= (unsigned int v)        { x=v; y=0; return *this; }
2126         campf& operator= (signed short v)        { x=v; y=0; return *this; }
2127         campf& operator= (unsigned short v)      { x=v; y=0; return *this; }
2128         campf& operator= (signed char v)         { x=v; y=0; return *this; }
2129         campf& operator= (unsigned char v)       { x=v; y=0; return *this; }
2130         campf& operator= (const char *s)         { x=s; y=0; return *this; }
2131         campf& operator= (const std::string &s)  { x=s; y=0; return *this; }
2132         campf& operator= (const campf& r)
2133         {
2134             x = r.x;
2135             y = r.y;
2136             return *this;
2137         }
2138 #ifndef _AMP_NO_TEMPLATE_CONSTRUCTORS
2139         template<unsigned int Precision2>
2140         campf& operator= (const campf<Precision2>& r)
2141         {
2142             x = r.x;
2143             y = r.y;
2144             return *this;
2145         }
2146 #endif
2147 
2148         ampf<Precision> x, y;
2149     };
2150 
2151     //
2152     // complex operations
2153     //
2154     template<unsigned int Precision>
2155     bool operator==(const campf<Precision>& lhs, const campf<Precision>& rhs)
2156     { return lhs.x==rhs.x && lhs.y==rhs.y; }
2157 
2158     template<unsigned int Precision>
2159     bool operator!=(const campf<Precision>& lhs, const campf<Precision>& rhs)
2160     { return lhs.x!=rhs.x || lhs.y!=rhs.y; }
2161 
2162     template<unsigned int Precision>
2163     const campf<Precision> operator+(const campf<Precision>& lhs)
2164     { return lhs; }
2165 
2166     template<unsigned int Precision>
2167     campf<Precision>& operator+=(campf<Precision>& lhs, const campf<Precision>& rhs)
2168     { lhs.x += rhs.x; lhs.y += rhs.y; return lhs; }
2169 
2170     template<unsigned int Precision>
2171     const campf<Precision> operator+(const campf<Precision>& lhs, const campf<Precision>& rhs)
2172     { campf<Precision> r = lhs; r += rhs; return r; }
2173 
2174     template<unsigned int Precision>
2175     const campf<Precision> operator-(const campf<Precision>& lhs)
2176     { return campf<Precision>(-lhs.x, -lhs.y); }
2177 
2178     template<unsigned int Precision>
2179     campf<Precision>& operator-=(campf<Precision>& lhs, const campf<Precision>& rhs)
2180     { lhs.x -= rhs.x; lhs.y -= rhs.y; return lhs; }
2181 
2182     template<unsigned int Precision>
2183     const campf<Precision> operator-(const campf<Precision>& lhs, const campf<Precision>& rhs)
2184     { campf<Precision> r = lhs; r -= rhs; return r; }
2185 
2186     template<unsigned int Precision>
2187     campf<Precision>& operator*=(campf<Precision>& lhs, const campf<Precision>& rhs)
2188     {
2189         ampf<Precision> xx(lhs.x*rhs.x), yy(lhs.y*rhs.y), mm((lhs.x+lhs.y)*(rhs.x+rhs.y));
2190         lhs.x = xx-yy;
2191         lhs.y = mm-xx-yy;
2192         return lhs;
2193     }
2194 
2195     template<unsigned int Precision>
2196     const campf<Precision> operator*(const campf<Precision>& lhs, const campf<Precision>& rhs)
2197     { campf<Precision> r = lhs; r *= rhs; return r; }
2198 
2199     template<unsigned int Precision>
2200     const campf<Precision> operator/(const campf<Precision>& lhs, const campf<Precision>& rhs)
2201     {
2202         campf<Precision> result;
2203         ampf<Precision> e;
2204         ampf<Precision> f;
2205         if( abs(rhs.y)<abs(rhs.x) )
2206         {
2207             e = rhs.y/rhs.x;
2208             f = rhs.x+rhs.y*e;
2209             result.x = (lhs.x+lhs.y*e)/f;
2210             result.y = (lhs.y-lhs.x*e)/f;
2211         }
2212         else
2213         {
2214             e = rhs.x/rhs.y;
2215             f = rhs.y+rhs.x*e;
2216             result.x = (lhs.y+lhs.x*e)/f;
2217             result.y = (-lhs.x+lhs.y*e)/f;
2218         }
2219         return result;
2220     }
2221 
2222     template<unsigned int Precision>
2223     campf<Precision>& operator/=(campf<Precision>& lhs, const campf<Precision>& rhs)
2224     {
2225         lhs = lhs/rhs;
2226         return lhs;
2227     }
2228 
2229     template<unsigned int Precision>
abscomplex(const campf<Precision> & z)2230     const ampf<Precision> abscomplex(const campf<Precision> &z)
2231     {
2232         ampf<Precision> w, xabs, yabs, v;
2233 
2234         xabs = abs(z.x);
2235         yabs = abs(z.y);
2236         w = xabs>yabs ? xabs : yabs;
2237         v = xabs<yabs ? xabs : yabs;
2238         if( v==0 )
2239             return w;
2240         else
2241         {
2242             ampf<Precision> t = v/w;
2243             return w*sqrt(1+sqr(t));
2244         }
2245     }
2246 
2247     template<unsigned int Precision>
conj(const campf<Precision> & z)2248     const campf<Precision> conj(const campf<Precision> &z)
2249     {
2250         return campf<Precision>(z.x, -z.y);
2251     }
2252 
2253     template<unsigned int Precision>
csqr(const campf<Precision> & z)2254     const campf<Precision> csqr(const campf<Precision> &z)
2255     {
2256         ampf<Precision> t = z.x*z.y;
2257         return campf<Precision>(sqr(z.x)-sqr(z.y), t+t);
2258     }
2259 
2260     //
2261     // different types of arguments
2262     //
2263     #define __AMP_BINARY_OPI(type) \
2264         template<unsigned int Precision> const campf<Precision> operator+ (const signed type& op1,      const campf<Precision>& op2) { return campf<Precision>(op1+op2.x, op2.y); }   \
2265         template<unsigned int Precision> const campf<Precision> operator+ (const unsigned type& op1,    const campf<Precision>& op2) { return campf<Precision>(op1+op2.x, op2.y); }   \
2266         template<unsigned int Precision> const campf<Precision> operator+ (const campf<Precision>& op1, const signed type& op2)      { return campf<Precision>(op1.x+op2, op1.y); }   \
2267         template<unsigned int Precision> const campf<Precision> operator+ (const campf<Precision>& op1, const unsigned type& op2)    { return campf<Precision>(op1.x+op2, op1.y); }   \
2268         template<unsigned int Precision> const campf<Precision> operator- (const signed type& op1,      const campf<Precision>& op2) { return campf<Precision>(op1-op2.x, -op2.y); }   \
2269         template<unsigned int Precision> const campf<Precision> operator- (const unsigned type& op1,    const campf<Precision>& op2) { return campf<Precision>(op1-op2.x, -op2.y); }   \
2270         template<unsigned int Precision> const campf<Precision> operator- (const campf<Precision>& op1, const signed type& op2)      { return campf<Precision>(op1.x-op2, op1.y); }   \
2271         template<unsigned int Precision> const campf<Precision> operator- (const campf<Precision>& op1, const unsigned type& op2)    { return campf<Precision>(op1.x-op2, op1.y); }   \
2272         template<unsigned int Precision> const campf<Precision> operator* (const signed type& op1,      const campf<Precision>& op2) { return campf<Precision>(op1*op2.x, op1*op2.y); }   \
2273         template<unsigned int Precision> const campf<Precision> operator* (const unsigned type& op1,    const campf<Precision>& op2) { return campf<Precision>(op1*op2.x, op1*op2.y); }   \
2274         template<unsigned int Precision> const campf<Precision> operator* (const campf<Precision>& op1, const signed type& op2)      { return campf<Precision>(op2*op1.x, op2*op1.y); }   \
2275         template<unsigned int Precision> const campf<Precision> operator* (const campf<Precision>& op1, const unsigned type& op2)    { return campf<Precision>(op2*op1.x, op2*op1.y); }   \
2276         template<unsigned int Precision> const campf<Precision> operator/ (const signed type& op1,      const campf<Precision>& op2) { return campf<Precision>(ampf<Precision>(op1),ampf<Precision>(0))/op2; }   \
2277         template<unsigned int Precision> const campf<Precision> operator/ (const unsigned type& op1,    const campf<Precision>& op2) { return campf<Precision>(ampf<Precision>(op1),ampf<Precision>(0))/op2; }   \
2278         template<unsigned int Precision> const campf<Precision> operator/ (const campf<Precision>& op1, const signed type& op2)      { return campf<Precision>(op1.x/op2, op1.y/op2); }   \
2279         template<unsigned int Precision> const campf<Precision> operator/ (const campf<Precision>& op1, const unsigned type& op2)    { return campf<Precision>(op1.x/op2, op1.y/op2); }   \
2280         template<unsigned int Precision>                   bool operator==(const signed type& op1,      const campf<Precision>& op2) { return op1==op2.x && op2.y==0; }   \
2281         template<unsigned int Precision>                   bool operator==(const unsigned type& op1,    const campf<Precision>& op2) { return op1==op2.x && op2.y==0; }   \
2282         template<unsigned int Precision>                   bool operator==(const campf<Precision>& op1, const signed type& op2)      { return op1.x==op2 && op1.y==0; }   \
2283         template<unsigned int Precision>                   bool operator==(const campf<Precision>& op1, const unsigned type& op2)    { return op1.x==op2 && op1.y==0; }   \
2284         template<unsigned int Precision>                   bool operator!=(const campf<Precision>& op1, const signed type& op2)      { return op1.x!=op2 || op1.y!=0; }   \
2285         template<unsigned int Precision>                   bool operator!=(const campf<Precision>& op1, const unsigned type& op2)    { return op1.x!=op2 || op1.y!=0; }   \
2286         template<unsigned int Precision>                   bool operator!=(const signed type& op1,      const campf<Precision>& op2) { return op1!=op2.x || op2.y!=0; }   \
2287         template<unsigned int Precision>                   bool operator!=(const unsigned type& op1,    const campf<Precision>& op2) { return op1!=op2.x || op2.y!=0; }
2288     __AMP_BINARY_OPI(char)
__AMP_BINARY_OPI(short)2289     __AMP_BINARY_OPI(short)
2290     __AMP_BINARY_OPI(long)
2291     __AMP_BINARY_OPI(int)
2292     #undef __AMP_BINARY_OPI
2293     #define __AMP_BINARY_OPF(type) \
2294         template<unsigned int Precision> const campf<Precision> operator+ (const type& op1,             const campf<Precision>& op2) { return campf<Precision>(op1+op2.x, op2.y); }   \
2295         template<unsigned int Precision> const campf<Precision> operator+ (const campf<Precision>& op1, const type& op2)             { return campf<Precision>(op1.x+op2, op1.y); }   \
2296         template<unsigned int Precision> const campf<Precision> operator- (const type& op1,             const campf<Precision>& op2) { return campf<Precision>(op1-op2.x, -op2.y); }   \
2297         template<unsigned int Precision> const campf<Precision> operator- (const campf<Precision>& op1, const type& op2)             { return campf<Precision>(op1.x-op2, op1.y); }   \
2298         template<unsigned int Precision> const campf<Precision> operator* (const type& op1,             const campf<Precision>& op2) { return campf<Precision>(op1*op2.x, op1*op2.y); }   \
2299         template<unsigned int Precision> const campf<Precision> operator* (const campf<Precision>& op1, const type& op2)             { return campf<Precision>(op2*op1.x, op2*op1.y); }   \
2300         template<unsigned int Precision> const campf<Precision> operator/ (const type& op1,             const campf<Precision>& op2) { return campf<Precision>(ampf<Precision>(op1),ampf<Precision>(0))/op2; }   \
2301         template<unsigned int Precision> const campf<Precision> operator/ (const campf<Precision>& op1, const type& op2)             { return campf<Precision>(op1.x/op2, op1.y/op2); }   \
2302         template<unsigned int Precision>                   bool operator==(const type& op1,             const campf<Precision>& op2) { return op1==op2.x && op2.y==0; }   \
2303         template<unsigned int Precision>                   bool operator==(const campf<Precision>& op1, const type& op2)             { return op1.x==op2 && op1.y==0; }   \
2304         template<unsigned int Precision>                   bool operator!=(const type& op1,             const campf<Precision>& op2) { return op1!=op2.x || op2.y!=0; }   \
2305         template<unsigned int Precision>                   bool operator!=(const campf<Precision>& op1, const type& op2)             { return op1.x!=op2 || op1.y!=0; }
2306     __AMP_BINARY_OPF(float)
2307     __AMP_BINARY_OPF(double)
2308     __AMP_BINARY_OPF(long double)
2309     __AMP_BINARY_OPF(ampf<Precision>)
2310     #undef __AMP_BINARY_OPF
2311 
2312     //
2313     // Real linear algebra
2314     //
2315     template<unsigned int Precision>
2316     ampf<Precision> vDotProduct(ap::const_raw_vector< ampf<Precision> > v1, ap::const_raw_vector< ampf<Precision> > v2)
2317     {
2318         ap::ap_error::make_assertion(v1.GetLength()==v2.GetLength());
2319         int i, cnt = v1.GetLength();
2320         const ampf<Precision> *p1 = v1.GetData();
2321         const ampf<Precision> *p2 = v2.GetData();
2322         mpfr_record *r = NULL;
2323         mpfr_record *t = NULL;
2324         //try
2325         {
2326             r = mpfr_storage::newMpfr(Precision);
2327             t = mpfr_storage::newMpfr(Precision);
2328             mpfr_set_ui(r->value, 0, GMP_RNDN);
2329             for(i=0; i<cnt; i++)
2330             {
2331                 mpfr_mul(t->value, p1->getReadPtr(), p2->getReadPtr(), GMP_RNDN);
2332                 mpfr_add(r->value, r->value, t->value, GMP_RNDN);
2333                 p1 += v1.GetStep();
2334                 p2 += v2.GetStep();
2335             }
2336             mpfr_storage::deleteMpfr(t);
2337             return r;
2338         }
2339         //catch(...)
2340         //{
2341         //    if( r!=NULL )
2342         //        mpfr_storage::deleteMpfr(r);
2343         //    if( t!=NULL )
2344         //        mpfr_storage::deleteMpfr(t);
2345         //    throw;
2346         //}
2347     }
2348 
2349     template<unsigned int Precision>
vMove(ap::raw_vector<ampf<Precision>> vDst,ap::const_raw_vector<ampf<Precision>> vSrc)2350     void vMove(ap::raw_vector< ampf<Precision> > vDst, ap::const_raw_vector< ampf<Precision> > vSrc)
2351     {
2352         ap::ap_error::make_assertion(vDst.GetLength()==vSrc.GetLength());
2353         int i, cnt = vDst.GetLength();
2354         ampf<Precision> *pDst = vDst.GetData();
2355         const ampf<Precision> *pSrc = vSrc.GetData();
2356         if( pDst==pSrc )
2357             return;
2358         for(i=0; i<cnt; i++)
2359         {
2360             *pDst = *pSrc;
2361             pDst += vDst.GetStep();
2362             pSrc += vSrc.GetStep();
2363         }
2364     }
2365 
2366     template<unsigned int Precision>
vMoveNeg(ap::raw_vector<ampf<Precision>> vDst,ap::const_raw_vector<ampf<Precision>> vSrc)2367     void vMoveNeg(ap::raw_vector< ampf<Precision> > vDst, ap::const_raw_vector< ampf<Precision> > vSrc)
2368     {
2369         ap::ap_error::make_assertion(vDst.GetLength()==vSrc.GetLength());
2370         int i, cnt = vDst.GetLength();
2371         ampf<Precision> *pDst = vDst.GetData();
2372         const ampf<Precision> *pSrc = vSrc.GetData();
2373         for(i=0; i<cnt; i++)
2374         {
2375             *pDst = *pSrc;
2376             mpfr_ptr v = pDst->getWritePtr();
2377             mpfr_neg(v, v, GMP_RNDN);
2378             pDst += vDst.GetStep();
2379             pSrc += vSrc.GetStep();
2380         }
2381     }
2382 
2383     template<unsigned int Precision, class T2>
vMove(ap::raw_vector<ampf<Precision>> vDst,ap::const_raw_vector<ampf<Precision>> vSrc,T2 alpha)2384     void vMove(ap::raw_vector< ampf<Precision> > vDst, ap::const_raw_vector< ampf<Precision> > vSrc, T2 alpha)
2385     {
2386         ap::ap_error::make_assertion(vDst.GetLength()==vSrc.GetLength());
2387         int i, cnt = vDst.GetLength();
2388         ampf<Precision>       *pDst = vDst.GetData();
2389         const ampf<Precision> *pSrc = vSrc.GetData();
2390         ampf<Precision>       a(alpha);
2391         for(i=0; i<cnt; i++)
2392         {
2393             *pDst = *pSrc;
2394             mpfr_ptr v = pDst->getWritePtr();
2395             mpfr_mul(v, v, a.getReadPtr(), GMP_RNDN);
2396             pDst += vDst.GetStep();
2397             pSrc += vSrc.GetStep();
2398         }
2399     }
2400 
2401     template<unsigned int Precision>
vAdd(ap::raw_vector<ampf<Precision>> vDst,ap::const_raw_vector<ampf<Precision>> vSrc)2402     void vAdd(ap::raw_vector< ampf<Precision> > vDst, ap::const_raw_vector< ampf<Precision> > vSrc)
2403     {
2404         ap::ap_error::make_assertion(vDst.GetLength()==vSrc.GetLength());
2405         int i, cnt = vDst.GetLength();
2406         ampf<Precision>       *pDst = vDst.GetData();
2407         const ampf<Precision> *pSrc = vSrc.GetData();
2408         for(i=0; i<cnt; i++)
2409         {
2410             mpfr_ptr    v  = pDst->getWritePtr();
2411             mpfr_srcptr vs = pSrc->getReadPtr();
2412             mpfr_add(v, v, vs, GMP_RNDN);
2413             pDst += vDst.GetStep();
2414             pSrc += vSrc.GetStep();
2415         }
2416     }
2417 
2418     template<unsigned int Precision, class T2>
vAdd(ap::raw_vector<ampf<Precision>> vDst,ap::const_raw_vector<ampf<Precision>> vSrc,T2 alpha)2419     void vAdd(ap::raw_vector< ampf<Precision> > vDst, ap::const_raw_vector< ampf<Precision> > vSrc, T2 alpha)
2420     {
2421         ap::ap_error::make_assertion(vDst.GetLength()==vSrc.GetLength());
2422         int i, cnt = vDst.GetLength();
2423         ampf<Precision>       *pDst = vDst.GetData();
2424         const ampf<Precision> *pSrc = vSrc.GetData();
2425         ampf<Precision>       a(alpha), tmp;
2426         for(i=0; i<cnt; i++)
2427         {
2428             mpfr_ptr    v  = pDst->getWritePtr();
2429             mpfr_srcptr vs = pSrc->getReadPtr();
2430             mpfr_mul(tmp.getWritePtr(), a.getReadPtr(), vs, GMP_RNDN);
2431             mpfr_add(v, v, tmp.getWritePtr(), GMP_RNDN);
2432             pDst += vDst.GetStep();
2433             pSrc += vSrc.GetStep();
2434         }
2435     }
2436 
2437     template<unsigned int Precision>
vSub(ap::raw_vector<ampf<Precision>> vDst,ap::const_raw_vector<ampf<Precision>> vSrc)2438     void vSub(ap::raw_vector< ampf<Precision> > vDst, ap::const_raw_vector< ampf<Precision> > vSrc)
2439     {
2440         ap::ap_error::make_assertion(vDst.GetLength()==vSrc.GetLength());
2441         int i, cnt = vDst.GetLength();
2442         ampf<Precision>       *pDst = vDst.GetData();
2443         const ampf<Precision> *pSrc = vSrc.GetData();
2444         for(i=0; i<cnt; i++)
2445         {
2446             mpfr_ptr    v  = pDst->getWritePtr();
2447             mpfr_srcptr vs = pSrc->getReadPtr();
2448             mpfr_sub(v, v, vs, GMP_RNDN);
2449             pDst += vDst.GetStep();
2450             pSrc += vSrc.GetStep();
2451         }
2452     }
2453 
2454     template<unsigned int Precision, class T2>
vSub(ap::raw_vector<ampf<Precision>> vDst,ap::const_raw_vector<ampf<Precision>> vSrc,T2 alpha)2455     void vSub(ap::raw_vector< ampf<Precision> > vDst, ap::const_raw_vector< ampf<Precision> > vSrc, T2 alpha)
2456     {
2457         vAdd(vDst, vSrc, -alpha);
2458     }
2459 
2460     template<unsigned int Precision, class T2>
vMul(ap::raw_vector<ampf<Precision>> vDst,T2 alpha)2461     void vMul(ap::raw_vector< ampf<Precision> > vDst, T2 alpha)
2462     {
2463         int i, cnt = vDst.GetLength();
2464         ampf<Precision>       *pDst = vDst.GetData();
2465         ampf<Precision>       a(alpha);
2466         for(i=0; i<cnt; i++)
2467         {
2468             mpfr_ptr    v  = pDst->getWritePtr();
2469             mpfr_mul(v, a.getReadPtr(), v, GMP_RNDN);
2470             pDst += vDst.GetStep();
2471         }
2472     }
2473 }
2474 
2475 /* stuff included from ./reflections.h */
2476 
2477 /*************************************************************************
2478 Copyright (c) 1992-2007 The University of Tennessee.  All rights reserved.
2479 
2480 Contributors:
2481     * Sergey Bochkanov (ALGLIB project). Translation from FORTRAN to
2482       pseudocode.
2483 
2484 See subroutines comments for additional copyrights.
2485 
2486 Redistribution and use in source and binary forms, with or without
2487 modification, are permitted provided that the following conditions are
2488 met:
2489 
2490 - Redistributions of source code must retain the above copyright
2491   notice, this list of conditions and the following disclaimer.
2492 
2493 - Redistributions in binary form must reproduce the above copyright
2494   notice, this list of conditions and the following disclaimer listed
2495   in this license in the documentation and/or other materials
2496   provided with the distribution.
2497 
2498 - Neither the name of the copyright holders nor the names of its
2499   contributors may be used to endorse or promote products derived from
2500   this software without specific prior written permission.
2501 
2502 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2503 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2504 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2505 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2506 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2507 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2508 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2509 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2510 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2511 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2512 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2513 *************************************************************************/
2514 
2515 namespace reflections
2516 {
2517     template<unsigned int Precision>
2518     void generatereflection(ap::template_1d_array< amp::ampf<Precision> >& x,
2519         int n,
2520         amp::ampf<Precision>& tau);
2521     template<unsigned int Precision>
2522     void applyreflectionfromtheleft(ap::template_2d_array< amp::ampf<Precision> >& c,
2523         amp::ampf<Precision> tau,
2524         const ap::template_1d_array< amp::ampf<Precision> >& v,
2525         int m1,
2526         int m2,
2527         int n1,
2528         int n2,
2529         ap::template_1d_array< amp::ampf<Precision> >& work);
2530     template<unsigned int Precision>
2531     void applyreflectionfromtheright(ap::template_2d_array< amp::ampf<Precision> >& c,
2532         amp::ampf<Precision> tau,
2533         const ap::template_1d_array< amp::ampf<Precision> >& v,
2534         int m1,
2535         int m2,
2536         int n1,
2537         int n2,
2538         ap::template_1d_array< amp::ampf<Precision> >& work);
2539 
2540 
2541     /*************************************************************************
2542     Generation of an elementary reflection transformation
2543 
2544     The subroutine generates elementary reflection H of order N, so that, for
2545     a given X, the following equality holds true:
2546 
2547         ( X(1) )   ( Beta )
2548     H * (  ..  ) = (  0   )
2549         ( X(n) )   (  0   )
2550 
2551     where
2552                   ( V(1) )
2553     H = 1 - Tau * (  ..  ) * ( V(1), ..., V(n) )
2554                   ( V(n) )
2555 
2556     where the first component of vector V equals 1.
2557 
2558     Input parameters:
2559         X   -   vector. Array whose index ranges within [1..N].
2560         N   -   reflection order.
2561 
2562     Output parameters:
2563         X   -   components from 2 to N are replaced with vector V.
2564                 The first component is replaced with parameter Beta.
2565         Tau -   scalar value Tau. If X is a null vector, Tau equals 0,
2566                 otherwise 1 <= Tau <= 2.
2567 
2568     This subroutine is the modification of the DLARFG subroutines from
2569     the LAPACK library. It has a similar functionality except for the
2570     cause an overflow.
2571 
2572 
2573     MODIFICATIONS:
2574         24.12.2005 sign(Alpha) was replaced with an analogous to the Fortran SIGN code.
2575 
2576       -- LAPACK auxiliary routine (version 3.0) --
2577          Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2578          Courant Institute, Argonne National Lab, and Rice University
2579          September 30, 1994
2580     *************************************************************************/
2581     template<unsigned int Precision>
generatereflection(ap::template_1d_array<amp::ampf<Precision>> & x,int n,amp::ampf<Precision> & tau)2582     void generatereflection(ap::template_1d_array< amp::ampf<Precision> >& x,
2583         int n,
2584         amp::ampf<Precision>& tau)
2585     {
2586         int j;
2587         amp::ampf<Precision> alpha;
2588         amp::ampf<Precision> xnorm;
2589         amp::ampf<Precision> v;
2590         amp::ampf<Precision> beta;
2591         amp::ampf<Precision> mx;
2592 
2593 
2594 
2595         //
2596         // Executable Statements ..
2597         //
2598         if( n<=1 )
2599         {
2600             tau = 0;
2601             return;
2602         }
2603 
2604         //
2605         // XNORM = DNRM2( N-1, X, INCX )
2606         //
2607         alpha = x(1);
2608         mx = 0;
2609         for(j=2; j<=n; j++)
2610         {
2611             mx = amp::maximum<Precision>(amp::abs<Precision>(x(j)), mx);
2612         }
2613         xnorm = 0;
2614         if( mx!=0 )
2615         {
2616             for(j=2; j<=n; j++)
2617             {
2618                 xnorm = xnorm+amp::sqr<Precision>(x(j)/mx);
2619             }
2620             xnorm = amp::sqrt<Precision>(xnorm)*mx;
2621         }
2622         if( xnorm==0 )
2623         {
2624 
2625             //
2626             // H  =  I
2627             //
2628             tau = 0;
2629             return;
2630         }
2631 
2632         //
2633         // general case
2634         //
2635         mx = amp::maximum<Precision>(amp::abs<Precision>(alpha), amp::abs<Precision>(xnorm));
2636         beta = -mx*amp::sqrt<Precision>(amp::sqr<Precision>(alpha/mx)+amp::sqr<Precision>(xnorm/mx));
2637         if( alpha<0 )
2638         {
2639             beta = -beta;
2640         }
2641         tau = (beta-alpha)/beta;
2642         v = 1/(alpha-beta);
2643         ap::vmul(x.getvector(2, n), v);
2644         x(1) = beta;
2645     }
2646 
2647 
2648     /*************************************************************************
2649     Application of an elementary reflection to a rectangular matrix of size MxN
2650 
2651     The algorithm pre-multiplies the matrix by an elementary reflection transformation
2652     which is given by column V and scalar Tau (see the description of the
2653     GenerateReflection procedure). Not the whole matrix but only a part of it
2654     is transformed (rows from M1 to M2, columns from N1 to N2). Only the elements
2655     of this submatrix are changed.
2656 
2657     Input parameters:
2658         C       -   matrix to be transformed.
2659         Tau     -   scalar defining the transformation.
2660         V       -   column defining the transformation.
2661                     Array whose index ranges within [1..M2-M1+1].
2662         M1, M2  -   range of rows to be transformed.
2663         N1, N2  -   range of columns to be transformed.
2664         WORK    -   working array whose indexes goes from N1 to N2.
2665 
2666     Output parameters:
2667         C       -   the result of multiplying the input matrix C by the
2668                     transformation matrix which is given by Tau and V.
2669                     If N1>N2 or M1>M2, C is not modified.
2670 
2671       -- LAPACK auxiliary routine (version 3.0) --
2672          Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2673          Courant Institute, Argonne National Lab, and Rice University
2674          September 30, 1994
2675     *************************************************************************/
2676     template<unsigned int Precision>
applyreflectionfromtheleft(ap::template_2d_array<amp::ampf<Precision>> & c,amp::ampf<Precision> tau,const ap::template_1d_array<amp::ampf<Precision>> & v,int m1,int m2,int n1,int n2,ap::template_1d_array<amp::ampf<Precision>> & work)2677     void applyreflectionfromtheleft(ap::template_2d_array< amp::ampf<Precision> >& c,
2678         amp::ampf<Precision> tau,
2679         const ap::template_1d_array< amp::ampf<Precision> >& v,
2680         int m1,
2681         int m2,
2682         int n1,
2683         int n2,
2684         ap::template_1d_array< amp::ampf<Precision> >& work)
2685     {
2686         amp::ampf<Precision> t;
2687         int i;
2688 
2689         if( tau==0 || n1>n2 || m1>m2 )
2690         {
2691             return;
2692         }
2693 
2694         //
2695         // w := C' * v
2696         //
2697         for(i=n1; i<=n2; i++)
2698         {
2699             work(i) = 0;
2700         }
2701         for(i=m1; i<=m2; i++)
2702         {
2703             t = v(i+1-m1);
2704             ap::vadd(work.getvector(n1, n2), c.getrow(i, n1, n2), t);
2705         }
2706 
2707         //
2708         // C := C - tau * v * w'
2709         //
2710         for(i=m1; i<=m2; i++)
2711         {
2712             t = v(i-m1+1)*tau;
2713             ap::vsub(c.getrow(i, n1, n2), work.getvector(n1, n2), t);
2714         }
2715     }
2716 
2717 
2718     /*************************************************************************
2719     Application of an elementary reflection to a rectangular matrix of size MxN
2720 
2721     The algorithm post-multiplies the matrix by an elementary reflection transformation
2722     which is given by column V and scalar Tau (see the description of the
2723     GenerateReflection procedure). Not the whole matrix but only a part of it
2724     is transformed (rows from M1 to M2, columns from N1 to N2). Only the
2725     elements of this submatrix are changed.
2726 
2727     Input parameters:
2728         C       -   matrix to be transformed.
2729         Tau     -   scalar defining the transformation.
2730         V       -   column defining the transformation.
2731                     Array whose index ranges within [1..N2-N1+1].
2732         M1, M2  -   range of rows to be transformed.
2733         N1, N2  -   range of columns to be transformed.
2734         WORK    -   working array whose indexes goes from M1 to M2.
2735 
2736     Output parameters:
2737         C       -   the result of multiplying the input matrix C by the
2738                     transformation matrix which is given by Tau and V.
2739                     If N1>N2 or M1>M2, C is not modified.
2740 
2741       -- LAPACK auxiliary routine (version 3.0) --
2742          Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2743          Courant Institute, Argonne National Lab, and Rice University
2744          September 30, 1994
2745     *************************************************************************/
2746     template<unsigned int Precision>
applyreflectionfromtheright(ap::template_2d_array<amp::ampf<Precision>> & c,amp::ampf<Precision> tau,const ap::template_1d_array<amp::ampf<Precision>> & v,int m1,int m2,int n1,int n2,ap::template_1d_array<amp::ampf<Precision>> & work)2747     void applyreflectionfromtheright(ap::template_2d_array< amp::ampf<Precision> >& c,
2748         amp::ampf<Precision> tau,
2749         const ap::template_1d_array< amp::ampf<Precision> >& v,
2750         int m1,
2751         int m2,
2752         int n1,
2753         int n2,
2754         ap::template_1d_array< amp::ampf<Precision> >& work)
2755     {
2756         amp::ampf<Precision> t;
2757         int i;
2758         int vm;
2759 
2760 
2761         if( tau==0 || n1>n2 || m1>m2 )
2762         {
2763             return;
2764         }
2765 
2766         //
2767         // w := C * v
2768         //
2769         vm = n2-n1+1;
2770         for(i=m1; i<=m2; i++)
2771         {
2772             t = ap::vdotproduct(c.getrow(i, n1, n2), v.getvector(1, vm));
2773             work(i) = t;
2774         }
2775 
2776         //
2777         // C := C - w * v'
2778         //
2779         for(i=m1; i<=m2; i++)
2780         {
2781             t = work(i)*tau;
2782             ap::vsub(c.getrow(i, n1, n2), v.getvector(1, vm), t);
2783         }
2784     }
2785 } // namespace
2786 
2787 /* stuff included from ./bidiagonal.h */
2788 
2789 /*************************************************************************
2790 Copyright (c) 1992-2007 The University of Tennessee.  All rights reserved.
2791 
2792 Contributors:
2793     * Sergey Bochkanov (ALGLIB project). Translation from FORTRAN to
2794       pseudocode.
2795 
2796 See subroutines comments for additional copyrights.
2797 
2798 Redistribution and use in source and binary forms, with or without
2799 modification, are permitted provided that the following conditions are
2800 met:
2801 
2802 - Redistributions of source code must retain the above copyright
2803   notice, this list of conditions and the following disclaimer.
2804 
2805 - Redistributions in binary form must reproduce the above copyright
2806   notice, this list of conditions and the following disclaimer listed
2807   in this license in the documentation and/or other materials
2808   provided with the distribution.
2809 
2810 - Neither the name of the copyright holders nor the names of its
2811   contributors may be used to endorse or promote products derived from
2812   this software without specific prior written permission.
2813 
2814 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2815 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2816 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2817 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2818 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2819 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2820 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2821 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2822 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2823 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2824 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2825 *************************************************************************/
2826 
2827 namespace bidiagonal
2828 {
2829     template<unsigned int Precision>
2830     void rmatrixbd(ap::template_2d_array< amp::ampf<Precision> >& a,
2831         int m,
2832         int n,
2833         ap::template_1d_array< amp::ampf<Precision> >& tauq,
2834         ap::template_1d_array< amp::ampf<Precision> >& taup);
2835     template<unsigned int Precision>
2836     void rmatrixbdunpackq(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2837         int m,
2838         int n,
2839         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
2840         int qcolumns,
2841         ap::template_2d_array< amp::ampf<Precision> >& q);
2842     template<unsigned int Precision>
2843     void rmatrixbdmultiplybyq(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2844         int m,
2845         int n,
2846         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
2847         ap::template_2d_array< amp::ampf<Precision> >& z,
2848         int zrows,
2849         int zcolumns,
2850         bool fromtheright,
2851         bool dotranspose);
2852     template<unsigned int Precision>
2853     void rmatrixbdunpackpt(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2854         int m,
2855         int n,
2856         const ap::template_1d_array< amp::ampf<Precision> >& taup,
2857         int ptrows,
2858         ap::template_2d_array< amp::ampf<Precision> >& pt);
2859     template<unsigned int Precision>
2860     void rmatrixbdmultiplybyp(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2861         int m,
2862         int n,
2863         const ap::template_1d_array< amp::ampf<Precision> >& taup,
2864         ap::template_2d_array< amp::ampf<Precision> >& z,
2865         int zrows,
2866         int zcolumns,
2867         bool fromtheright,
2868         bool dotranspose);
2869     template<unsigned int Precision>
2870     void rmatrixbdunpackdiagonals(const ap::template_2d_array< amp::ampf<Precision> >& b,
2871         int m,
2872         int n,
2873         bool& isupper,
2874         ap::template_1d_array< amp::ampf<Precision> >& d,
2875         ap::template_1d_array< amp::ampf<Precision> >& e);
2876     template<unsigned int Precision>
2877     void tobidiagonal(ap::template_2d_array< amp::ampf<Precision> >& a,
2878         int m,
2879         int n,
2880         ap::template_1d_array< amp::ampf<Precision> >& tauq,
2881         ap::template_1d_array< amp::ampf<Precision> >& taup);
2882     template<unsigned int Precision>
2883     void unpackqfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2884         int m,
2885         int n,
2886         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
2887         int qcolumns,
2888         ap::template_2d_array< amp::ampf<Precision> >& q);
2889     template<unsigned int Precision>
2890     void multiplybyqfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2891         int m,
2892         int n,
2893         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
2894         ap::template_2d_array< amp::ampf<Precision> >& z,
2895         int zrows,
2896         int zcolumns,
2897         bool fromtheright,
2898         bool dotranspose);
2899     template<unsigned int Precision>
2900     void unpackptfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2901         int m,
2902         int n,
2903         const ap::template_1d_array< amp::ampf<Precision> >& taup,
2904         int ptrows,
2905         ap::template_2d_array< amp::ampf<Precision> >& pt);
2906     template<unsigned int Precision>
2907     void multiplybypfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
2908         int m,
2909         int n,
2910         const ap::template_1d_array< amp::ampf<Precision> >& taup,
2911         ap::template_2d_array< amp::ampf<Precision> >& z,
2912         int zrows,
2913         int zcolumns,
2914         bool fromtheright,
2915         bool dotranspose);
2916     template<unsigned int Precision>
2917     void unpackdiagonalsfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& b,
2918         int m,
2919         int n,
2920         bool& isupper,
2921         ap::template_1d_array< amp::ampf<Precision> >& d,
2922         ap::template_1d_array< amp::ampf<Precision> >& e);
2923 
2924 
2925     /*************************************************************************
2926     Reduction of a rectangular matrix to  bidiagonal form
2927 
2928     The algorithm reduces the rectangular matrix A to  bidiagonal form by
2929     orthogonal transformations P and Q: A = Q*B*P.
2930 
2931     Input parameters:
2932         A       -   source matrix. array[0..M-1, 0..N-1]
2933         M       -   number of rows in matrix A.
2934         N       -   number of columns in matrix A.
2935 
2936     Output parameters:
2937         A       -   matrices Q, B, P in compact form (see below).
2938         TauQ    -   scalar factors which are used to form matrix Q.
2939         TauP    -   scalar factors which are used to form matrix P.
2940 
2941     The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
2942     replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
2943     reflections which form MxM matrix Q and NxN matrix P, respectively.
2944 
2945     If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
2946     corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
2947     product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
2948     H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
2949     vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
2950     stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
2951     G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
2952     u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).
2953 
2954     If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
2955     corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
2956     H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
2957     is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
2958     G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
2959     is stored in A(i,i+1:n-1).
2960 
2961     EXAMPLE:
2962 
2963     m=6, n=5 (m > n):               m=5, n=6 (m < n):
2964 
2965     (  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
2966     (  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
2967     (  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
2968     (  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
2969     (  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
2970     (  v1  v2  v3  v4  v5 )
2971 
2972     Here vi and ui are vectors which form H(i) and G(i), and d and e -
2973     are the diagonal and off-diagonal elements of matrix B.
2974     *************************************************************************/
2975     template<unsigned int Precision>
rmatrixbd(ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_1d_array<amp::ampf<Precision>> & tauq,ap::template_1d_array<amp::ampf<Precision>> & taup)2976     void rmatrixbd(ap::template_2d_array< amp::ampf<Precision> >& a,
2977         int m,
2978         int n,
2979         ap::template_1d_array< amp::ampf<Precision> >& tauq,
2980         ap::template_1d_array< amp::ampf<Precision> >& taup)
2981     {
2982         ap::template_1d_array< amp::ampf<Precision> > work;
2983         ap::template_1d_array< amp::ampf<Precision> > t;
2984         int minmn;
2985         int maxmn;
2986         int i;
2987         int j;
2988         amp::ampf<Precision> ltau;
2989 
2990 
2991 
2992         //
2993         // Prepare
2994         //
2995         if( n<=0 || m<=0 )
2996         {
2997             return;
2998         }
2999         minmn = ap::minint(m, n);
3000         maxmn = ap::maxint(m, n);
3001         work.setbounds(0, maxmn);
3002         t.setbounds(0, maxmn);
3003         if( m>=n )
3004         {
3005             tauq.setbounds(0, n-1);
3006             taup.setbounds(0, n-1);
3007         }
3008         else
3009         {
3010             tauq.setbounds(0, m-1);
3011             taup.setbounds(0, m-1);
3012         }
3013         if( m>=n )
3014         {
3015 
3016             //
3017             // Reduce to upper bidiagonal form
3018             //
3019             for(i=0; i<=n-1; i++)
3020             {
3021 
3022                 //
3023                 // Generate elementary reflector H(i) to annihilate A(i+1:m-1,i)
3024                 //
3025                 ap::vmove(t.getvector(1, m-i), a.getcolumn(i, i, m-1));
3026                 reflections::generatereflection<Precision>(t, m-i, ltau);
3027                 tauq(i) = ltau;
3028                 ap::vmove(a.getcolumn(i, i, m-1), t.getvector(1, m-i));
3029                 t(1) = 1;
3030 
3031                 //
3032                 // Apply H(i) to A(i:m-1,i+1:n-1) from the left
3033                 //
3034                 reflections::applyreflectionfromtheleft<Precision>(a, ltau, t, i, m-1, i+1, n-1, work);
3035                 if( i<n-1 )
3036                 {
3037 
3038                     //
3039                     // Generate elementary reflector G(i) to annihilate
3040                     // A(i,i+2:n-1)
3041                     //
3042                     ap::vmove(t.getvector(1, n-i-1), a.getrow(i, i+1, n-1));
3043                     reflections::generatereflection<Precision>(t, n-1-i, ltau);
3044                     taup(i) = ltau;
3045                     ap::vmove(a.getrow(i, i+1, n-1), t.getvector(1, n-1-i));
3046                     t(1) = 1;
3047 
3048                     //
3049                     // Apply G(i) to A(i+1:m-1,i+1:n-1) from the right
3050                     //
3051                     reflections::applyreflectionfromtheright<Precision>(a, ltau, t, i+1, m-1, i+1, n-1, work);
3052                 }
3053                 else
3054                 {
3055                     taup(i) = 0;
3056                 }
3057             }
3058         }
3059         else
3060         {
3061 
3062             //
3063             // Reduce to lower bidiagonal form
3064             //
3065             for(i=0; i<=m-1; i++)
3066             {
3067 
3068                 //
3069                 // Generate elementary reflector G(i) to annihilate A(i,i+1:n-1)
3070                 //
3071                 ap::vmove(t.getvector(1, n-i), a.getrow(i, i, n-1));
3072                 reflections::generatereflection<Precision>(t, n-i, ltau);
3073                 taup(i) = ltau;
3074                 ap::vmove(a.getrow(i, i, n-1), t.getvector(1, n-i));
3075                 t(1) = 1;
3076 
3077                 //
3078                 // Apply G(i) to A(i+1:m-1,i:n-1) from the right
3079                 //
3080                 reflections::applyreflectionfromtheright<Precision>(a, ltau, t, i+1, m-1, i, n-1, work);
3081                 if( i<m-1 )
3082                 {
3083 
3084                     //
3085                     // Generate elementary reflector H(i) to annihilate
3086                     // A(i+2:m-1,i)
3087                     //
3088                     ap::vmove(t.getvector(1, m-1-i), a.getcolumn(i, i+1, m-1));
3089                     reflections::generatereflection<Precision>(t, m-1-i, ltau);
3090                     tauq(i) = ltau;
3091                     ap::vmove(a.getcolumn(i, i+1, m-1), t.getvector(1, m-1-i));
3092                     t(1) = 1;
3093 
3094                     //
3095                     // Apply H(i) to A(i+1:m-1,i+1:n-1) from the left
3096                     //
3097                     reflections::applyreflectionfromtheleft<Precision>(a, ltau, t, i+1, m-1, i+1, n-1, work);
3098                 }
3099                 else
3100                 {
3101                     tauq(i) = 0;
3102                 }
3103             }
3104         }
3105     }
3106 
3107 
3108     /*************************************************************************
3109     Unpacking matrix Q which reduces a matrix to bidiagonal form.
3110 
3111     Input parameters:
3112         QP          -   matrices Q and P in compact form.
3113                         Output of ToBidiagonal subroutine.
3114         M           -   number of rows in matrix A.
3115         N           -   number of columns in matrix A.
3116         TAUQ        -   scalar factors which are used to form Q.
3117                         Output of ToBidiagonal subroutine.
3118         QColumns    -   required number of columns in matrix Q.
3119                         M>=QColumns>=0.
3120 
3121     Output parameters:
3122         Q           -   first QColumns columns of matrix Q.
3123                         Array[0..M-1, 0..QColumns-1]
3124                         If QColumns=0, the array is not modified.
3125 
3126       -- ALGLIB --
3127          Copyright 2005 by Bochkanov Sergey
3128     *************************************************************************/
3129     template<unsigned int Precision>
rmatrixbdunpackq(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tauq,int qcolumns,ap::template_2d_array<amp::ampf<Precision>> & q)3130     void rmatrixbdunpackq(const ap::template_2d_array< amp::ampf<Precision> >& qp,
3131         int m,
3132         int n,
3133         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
3134         int qcolumns,
3135         ap::template_2d_array< amp::ampf<Precision> >& q)
3136     {
3137         int i;
3138         int j;
3139 
3140 
3141         ap::ap_error::make_assertion(qcolumns<=m);
3142         ap::ap_error::make_assertion(qcolumns>=0);
3143         if( m==0 || n==0 || qcolumns==0 )
3144         {
3145             return;
3146         }
3147 
3148         //
3149         // prepare Q
3150         //
3151         q.setbounds(0, m-1, 0, qcolumns-1);
3152         for(i=0; i<=m-1; i++)
3153         {
3154             for(j=0; j<=qcolumns-1; j++)
3155             {
3156                 if( i==j )
3157                 {
3158                     q(i,j) = 1;
3159                 }
3160                 else
3161                 {
3162                     q(i,j) = 0;
3163                 }
3164             }
3165         }
3166 
3167         //
3168         // Calculate
3169         //
3170         rmatrixbdmultiplybyq<Precision>(qp, m, n, tauq, q, m, qcolumns, false, false);
3171     }
3172 
3173 
3174     /*************************************************************************
3175     Multiplication by matrix Q which reduces matrix A to  bidiagonal form.
3176 
3177     The algorithm allows pre- or post-multiply by Q or Q'.
3178 
3179     Input parameters:
3180         QP          -   matrices Q and P in compact form.
3181                         Output of ToBidiagonal subroutine.
3182         M           -   number of rows in matrix A.
3183         N           -   number of columns in matrix A.
3184         TAUQ        -   scalar factors which are used to form Q.
3185                         Output of ToBidiagonal subroutine.
3186         Z           -   multiplied matrix.
3187                         array[0..ZRows-1,0..ZColumns-1]
3188         ZRows       -   number of rows in matrix Z. If FromTheRight=False,
3189                         ZRows=M, otherwise ZRows can be arbitrary.
3190         ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
3191                         ZColumns=M, otherwise ZColumns can be arbitrary.
3192         FromTheRight -  pre- or post-multiply.
3193         DoTranspose -   multiply by Q or Q'.
3194 
3195     Output parameters:
3196         Z           -   product of Z and Q.
3197                         Array[0..ZRows-1,0..ZColumns-1]
3198                         If ZRows=0 or ZColumns=0, the array is not modified.
3199 
3200       -- ALGLIB --
3201          Copyright 2005 by Bochkanov Sergey
3202     *************************************************************************/
3203     template<unsigned int Precision>
rmatrixbdmultiplybyq(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tauq,ap::template_2d_array<amp::ampf<Precision>> & z,int zrows,int zcolumns,bool fromtheright,bool dotranspose)3204     void rmatrixbdmultiplybyq(const ap::template_2d_array< amp::ampf<Precision> >& qp,
3205         int m,
3206         int n,
3207         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
3208         ap::template_2d_array< amp::ampf<Precision> >& z,
3209         int zrows,
3210         int zcolumns,
3211         bool fromtheright,
3212         bool dotranspose)
3213     {
3214         int i;
3215         int i1;
3216         int i2;
3217         int istep;
3218         ap::template_1d_array< amp::ampf<Precision> > v;
3219         ap::template_1d_array< amp::ampf<Precision> > work;
3220         int mx;
3221 
3222 
3223         if( m<=0 || n<=0 || zrows<=0 || zcolumns<=0 )
3224         {
3225             return;
3226         }
3227         ap::ap_error::make_assertion(fromtheright ? zcolumns==m : zrows==m);
3228 
3229         //
3230         // init
3231         //
3232         mx = ap::maxint(m, n);
3233         mx = ap::maxint(mx, zrows);
3234         mx = ap::maxint(mx, zcolumns);
3235         v.setbounds(0, mx);
3236         work.setbounds(0, mx);
3237         if( m>=n )
3238         {
3239 
3240             //
3241             // setup
3242             //
3243             if( fromtheright )
3244             {
3245                 i1 = 0;
3246                 i2 = n-1;
3247                 istep = +1;
3248             }
3249             else
3250             {
3251                 i1 = n-1;
3252                 i2 = 0;
3253                 istep = -1;
3254             }
3255             if( dotranspose )
3256             {
3257                 i = i1;
3258                 i1 = i2;
3259                 i2 = i;
3260                 istep = -istep;
3261             }
3262 
3263             //
3264             // Process
3265             //
3266             i = i1;
3267             do
3268             {
3269                 ap::vmove(v.getvector(1, m-i), qp.getcolumn(i, i, m-1));
3270                 v(1) = 1;
3271                 if( fromtheright )
3272                 {
3273                     reflections::applyreflectionfromtheright<Precision>(z, tauq(i), v, 0, zrows-1, i, m-1, work);
3274                 }
3275                 else
3276                 {
3277                     reflections::applyreflectionfromtheleft<Precision>(z, tauq(i), v, i, m-1, 0, zcolumns-1, work);
3278                 }
3279                 i = i+istep;
3280             }
3281             while( i!=i2+istep );
3282         }
3283         else
3284         {
3285 
3286             //
3287             // setup
3288             //
3289             if( fromtheright )
3290             {
3291                 i1 = 0;
3292                 i2 = m-2;
3293                 istep = +1;
3294             }
3295             else
3296             {
3297                 i1 = m-2;
3298                 i2 = 0;
3299                 istep = -1;
3300             }
3301             if( dotranspose )
3302             {
3303                 i = i1;
3304                 i1 = i2;
3305                 i2 = i;
3306                 istep = -istep;
3307             }
3308 
3309             //
3310             // Process
3311             //
3312             if( m-1>0 )
3313             {
3314                 i = i1;
3315                 do
3316                 {
3317                     ap::vmove(v.getvector(1, m-i-1), qp.getcolumn(i, i+1, m-1));
3318                     v(1) = 1;
3319                     if( fromtheright )
3320                     {
3321                         reflections::applyreflectionfromtheright<Precision>(z, tauq(i), v, 0, zrows-1, i+1, m-1, work);
3322                     }
3323                     else
3324                     {
3325                         reflections::applyreflectionfromtheleft<Precision>(z, tauq(i), v, i+1, m-1, 0, zcolumns-1, work);
3326                     }
3327                     i = i+istep;
3328                 }
3329                 while( i!=i2+istep );
3330             }
3331         }
3332     }
3333 
3334 
3335     /*************************************************************************
3336     Unpacking matrix P which reduces matrix A to bidiagonal form.
3337     The subroutine returns transposed matrix P.
3338 
3339     Input parameters:
3340         QP      -   matrices Q and P in compact form.
3341                     Output of ToBidiagonal subroutine.
3342         M       -   number of rows in matrix A.
3343         N       -   number of columns in matrix A.
3344         TAUP    -   scalar factors which are used to form P.
3345                     Output of ToBidiagonal subroutine.
3346         PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.
3347 
3348     Output parameters:
3349         PT      -   first PTRows columns of matrix P^T
3350                     Array[0..PTRows-1, 0..N-1]
3351                     If PTRows=0, the array is not modified.
3352 
3353       -- ALGLIB --
3354          Copyright 2005-2007 by Bochkanov Sergey
3355     *************************************************************************/
3356     template<unsigned int Precision>
rmatrixbdunpackpt(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & taup,int ptrows,ap::template_2d_array<amp::ampf<Precision>> & pt)3357     void rmatrixbdunpackpt(const ap::template_2d_array< amp::ampf<Precision> >& qp,
3358         int m,
3359         int n,
3360         const ap::template_1d_array< amp::ampf<Precision> >& taup,
3361         int ptrows,
3362         ap::template_2d_array< amp::ampf<Precision> >& pt)
3363     {
3364         int i;
3365         int j;
3366 
3367 
3368         ap::ap_error::make_assertion(ptrows<=n);
3369         ap::ap_error::make_assertion(ptrows>=0);
3370         if( m==0 || n==0 || ptrows==0 )
3371         {
3372             return;
3373         }
3374 
3375         //
3376         // prepare PT
3377         //
3378         pt.setbounds(0, ptrows-1, 0, n-1);
3379         for(i=0; i<=ptrows-1; i++)
3380         {
3381             for(j=0; j<=n-1; j++)
3382             {
3383                 if( i==j )
3384                 {
3385                     pt(i,j) = 1;
3386                 }
3387                 else
3388                 {
3389                     pt(i,j) = 0;
3390                 }
3391             }
3392         }
3393 
3394         //
3395         // Calculate
3396         //
3397         rmatrixbdmultiplybyp<Precision>(qp, m, n, taup, pt, ptrows, n, true, true);
3398     }
3399 
3400 
3401     /*************************************************************************
3402     Multiplication by matrix P which reduces matrix A to  bidiagonal form.
3403 
3404     The algorithm allows pre- or post-multiply by P or P'.
3405 
3406     Input parameters:
3407         QP          -   matrices Q and P in compact form.
3408                         Output of RMatrixBD subroutine.
3409         M           -   number of rows in matrix A.
3410         N           -   number of columns in matrix A.
3411         TAUP        -   scalar factors which are used to form P.
3412                         Output of RMatrixBD subroutine.
3413         Z           -   multiplied matrix.
3414                         Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
3415         ZRows       -   number of rows in matrix Z. If FromTheRight=False,
3416                         ZRows=N, otherwise ZRows can be arbitrary.
3417         ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
3418                         ZColumns=N, otherwise ZColumns can be arbitrary.
3419         FromTheRight -  pre- or post-multiply.
3420         DoTranspose -   multiply by P or P'.
3421 
3422     Output parameters:
3423         Z - product of Z and P.
3424                     Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
3425                     If ZRows=0 or ZColumns=0, the array is not modified.
3426 
3427       -- ALGLIB --
3428          Copyright 2005-2007 by Bochkanov Sergey
3429     *************************************************************************/
3430     template<unsigned int Precision>
rmatrixbdmultiplybyp(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & taup,ap::template_2d_array<amp::ampf<Precision>> & z,int zrows,int zcolumns,bool fromtheright,bool dotranspose)3431     void rmatrixbdmultiplybyp(const ap::template_2d_array< amp::ampf<Precision> >& qp,
3432         int m,
3433         int n,
3434         const ap::template_1d_array< amp::ampf<Precision> >& taup,
3435         ap::template_2d_array< amp::ampf<Precision> >& z,
3436         int zrows,
3437         int zcolumns,
3438         bool fromtheright,
3439         bool dotranspose)
3440     {
3441         int i;
3442         ap::template_1d_array< amp::ampf<Precision> > v;
3443         ap::template_1d_array< amp::ampf<Precision> > work;
3444         int mx;
3445         int i1;
3446         int i2;
3447         int istep;
3448 
3449 
3450         if( m<=0 || n<=0 || zrows<=0 || zcolumns<=0 )
3451         {
3452             return;
3453         }
3454         ap::ap_error::make_assertion(fromtheright ? zcolumns==n : zrows==n);
3455 
3456         //
3457         // init
3458         //
3459         mx = ap::maxint(m, n);
3460         mx = ap::maxint(mx, zrows);
3461         mx = ap::maxint(mx, zcolumns);
3462         v.setbounds(0, mx);
3463         work.setbounds(0, mx);
3464         v.setbounds(0, mx);
3465         work.setbounds(0, mx);
3466         if( m>=n )
3467         {
3468 
3469             //
3470             // setup
3471             //
3472             if( fromtheright )
3473             {
3474                 i1 = n-2;
3475                 i2 = 0;
3476                 istep = -1;
3477             }
3478             else
3479             {
3480                 i1 = 0;
3481                 i2 = n-2;
3482                 istep = +1;
3483             }
3484             if( !dotranspose )
3485             {
3486                 i = i1;
3487                 i1 = i2;
3488                 i2 = i;
3489                 istep = -istep;
3490             }
3491 
3492             //
3493             // Process
3494             //
3495             if( n-1>0 )
3496             {
3497                 i = i1;
3498                 do
3499                 {
3500                     ap::vmove(v.getvector(1, n-1-i), qp.getrow(i, i+1, n-1));
3501                     v(1) = 1;
3502                     if( fromtheright )
3503                     {
3504                         reflections::applyreflectionfromtheright<Precision>(z, taup(i), v, 0, zrows-1, i+1, n-1, work);
3505                     }
3506                     else
3507                     {
3508                         reflections::applyreflectionfromtheleft<Precision>(z, taup(i), v, i+1, n-1, 0, zcolumns-1, work);
3509                     }
3510                     i = i+istep;
3511                 }
3512                 while( i!=i2+istep );
3513             }
3514         }
3515         else
3516         {
3517 
3518             //
3519             // setup
3520             //
3521             if( fromtheright )
3522             {
3523                 i1 = m-1;
3524                 i2 = 0;
3525                 istep = -1;
3526             }
3527             else
3528             {
3529                 i1 = 0;
3530                 i2 = m-1;
3531                 istep = +1;
3532             }
3533             if( !dotranspose )
3534             {
3535                 i = i1;
3536                 i1 = i2;
3537                 i2 = i;
3538                 istep = -istep;
3539             }
3540 
3541             //
3542             // Process
3543             //
3544             i = i1;
3545             do
3546             {
3547                 ap::vmove(v.getvector(1, n-i), qp.getrow(i, i, n-1));
3548                 v(1) = 1;
3549                 if( fromtheright )
3550                 {
3551                     reflections::applyreflectionfromtheright<Precision>(z, taup(i), v, 0, zrows-1, i, n-1, work);
3552                 }
3553                 else
3554                 {
3555                     reflections::applyreflectionfromtheleft<Precision>(z, taup(i), v, i, n-1, 0, zcolumns-1, work);
3556                 }
3557                 i = i+istep;
3558             }
3559             while( i!=i2+istep );
3560         }
3561     }
3562 
3563 
3564     /*************************************************************************
3565     Unpacking of the main and secondary diagonals of bidiagonal decomposition
3566     of matrix A.
3567 
3568     Input parameters:
3569         B   -   output of RMatrixBD subroutine.
3570         M   -   number of rows in matrix B.
3571         N   -   number of columns in matrix B.
3572 
3573     Output parameters:
3574         IsUpper -   True, if the matrix is upper bidiagonal.
3575                     otherwise IsUpper is False.
3576         D       -   the main diagonal.
3577                     Array whose index ranges within [0..Min(M,N)-1].
3578         E       -   the secondary diagonal (upper or lower, depending on
3579                     the value of IsUpper).
3580                     Array index ranges within [0..Min(M,N)-1], the last
3581                     element is not used.
3582 
3583       -- ALGLIB --
3584          Copyright 2005-2007 by Bochkanov Sergey
3585     *************************************************************************/
3586     template<unsigned int Precision>
rmatrixbdunpackdiagonals(const ap::template_2d_array<amp::ampf<Precision>> & b,int m,int n,bool & isupper,ap::template_1d_array<amp::ampf<Precision>> & d,ap::template_1d_array<amp::ampf<Precision>> & e)3587     void rmatrixbdunpackdiagonals(const ap::template_2d_array< amp::ampf<Precision> >& b,
3588         int m,
3589         int n,
3590         bool& isupper,
3591         ap::template_1d_array< amp::ampf<Precision> >& d,
3592         ap::template_1d_array< amp::ampf<Precision> >& e)
3593     {
3594         int i;
3595 
3596 
3597         isupper = m>=n;
3598         if( m<=0 || n<=0 )
3599         {
3600             return;
3601         }
3602         if( isupper )
3603         {
3604             d.setbounds(0, n-1);
3605             e.setbounds(0, n-1);
3606             for(i=0; i<=n-2; i++)
3607             {
3608                 d(i) = b(i,i);
3609                 e(i) = b(i,i+1);
3610             }
3611             d(n-1) = b(n-1,n-1);
3612         }
3613         else
3614         {
3615             d.setbounds(0, m-1);
3616             e.setbounds(0, m-1);
3617             for(i=0; i<=m-2; i++)
3618             {
3619                 d(i) = b(i,i);
3620                 e(i) = b(i+1,i);
3621             }
3622             d(m-1) = b(m-1,m-1);
3623         }
3624     }
3625 
3626 
3627     /*************************************************************************
3628     Obsolete 1-based subroutine.
3629     See RMatrixBD for 0-based replacement.
3630     *************************************************************************/
3631     template<unsigned int Precision>
tobidiagonal(ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_1d_array<amp::ampf<Precision>> & tauq,ap::template_1d_array<amp::ampf<Precision>> & taup)3632     void tobidiagonal(ap::template_2d_array< amp::ampf<Precision> >& a,
3633         int m,
3634         int n,
3635         ap::template_1d_array< amp::ampf<Precision> >& tauq,
3636         ap::template_1d_array< amp::ampf<Precision> >& taup)
3637     {
3638         ap::template_1d_array< amp::ampf<Precision> > work;
3639         ap::template_1d_array< amp::ampf<Precision> > t;
3640         int minmn;
3641         int maxmn;
3642         int i;
3643         amp::ampf<Precision> ltau;
3644         int mmip1;
3645         int nmi;
3646         int ip1;
3647         int nmip1;
3648         int mmi;
3649 
3650 
3651         minmn = ap::minint(m, n);
3652         maxmn = ap::maxint(m, n);
3653         work.setbounds(1, maxmn);
3654         t.setbounds(1, maxmn);
3655         taup.setbounds(1, minmn);
3656         tauq.setbounds(1, minmn);
3657         if( m>=n )
3658         {
3659 
3660             //
3661             // Reduce to upper bidiagonal form
3662             //
3663             for(i=1; i<=n; i++)
3664             {
3665 
3666                 //
3667                 // Generate elementary reflector H(i) to annihilate A(i+1:m,i)
3668                 //
3669                 mmip1 = m-i+1;
3670                 ap::vmove(t.getvector(1, mmip1), a.getcolumn(i, i, m));
3671                 reflections::generatereflection<Precision>(t, mmip1, ltau);
3672                 tauq(i) = ltau;
3673                 ap::vmove(a.getcolumn(i, i, m), t.getvector(1, mmip1));
3674                 t(1) = 1;
3675 
3676                 //
3677                 // Apply H(i) to A(i:m,i+1:n) from the left
3678                 //
3679                 reflections::applyreflectionfromtheleft<Precision>(a, ltau, t, i, m, i+1, n, work);
3680                 if( i<n )
3681                 {
3682 
3683                     //
3684                     // Generate elementary reflector G(i) to annihilate
3685                     // A(i,i+2:n)
3686                     //
3687                     nmi = n-i;
3688                     ip1 = i+1;
3689                     ap::vmove(t.getvector(1, nmi), a.getrow(i, ip1, n));
3690                     reflections::generatereflection<Precision>(t, nmi, ltau);
3691                     taup(i) = ltau;
3692                     ap::vmove(a.getrow(i, ip1, n), t.getvector(1, nmi));
3693                     t(1) = 1;
3694 
3695                     //
3696                     // Apply G(i) to A(i+1:m,i+1:n) from the right
3697                     //
3698                     reflections::applyreflectionfromtheright<Precision>(a, ltau, t, i+1, m, i+1, n, work);
3699                 }
3700                 else
3701                 {
3702                     taup(i) = 0;
3703                 }
3704             }
3705         }
3706         else
3707         {
3708 
3709             //
3710             // Reduce to lower bidiagonal form
3711             //
3712             for(i=1; i<=m; i++)
3713             {
3714 
3715                 //
3716                 // Generate elementary reflector G(i) to annihilate A(i,i+1:n)
3717                 //
3718                 nmip1 = n-i+1;
3719                 ap::vmove(t.getvector(1, nmip1), a.getrow(i, i, n));
3720                 reflections::generatereflection<Precision>(t, nmip1, ltau);
3721                 taup(i) = ltau;
3722                 ap::vmove(a.getrow(i, i, n), t.getvector(1, nmip1));
3723                 t(1) = 1;
3724 
3725                 //
3726                 // Apply G(i) to A(i+1:m,i:n) from the right
3727                 //
3728                 reflections::applyreflectionfromtheright<Precision>(a, ltau, t, i+1, m, i, n, work);
3729                 if( i<m )
3730                 {
3731 
3732                     //
3733                     // Generate elementary reflector H(i) to annihilate
3734                     // A(i+2:m,i)
3735                     //
3736                     mmi = m-i;
3737                     ip1 = i+1;
3738                     ap::vmove(t.getvector(1, mmi), a.getcolumn(i, ip1, m));
3739                     reflections::generatereflection<Precision>(t, mmi, ltau);
3740                     tauq(i) = ltau;
3741                     ap::vmove(a.getcolumn(i, ip1, m), t.getvector(1, mmi));
3742                     t(1) = 1;
3743 
3744                     //
3745                     // Apply H(i) to A(i+1:m,i+1:n) from the left
3746                     //
3747                     reflections::applyreflectionfromtheleft<Precision>(a, ltau, t, i+1, m, i+1, n, work);
3748                 }
3749                 else
3750                 {
3751                     tauq(i) = 0;
3752                 }
3753             }
3754         }
3755     }
3756 
3757 
3758     /*************************************************************************
3759     Obsolete 1-based subroutine.
3760     See RMatrixBDUnpackQ for 0-based replacement.
3761     *************************************************************************/
3762     template<unsigned int Precision>
unpackqfrombidiagonal(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tauq,int qcolumns,ap::template_2d_array<amp::ampf<Precision>> & q)3763     void unpackqfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
3764         int m,
3765         int n,
3766         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
3767         int qcolumns,
3768         ap::template_2d_array< amp::ampf<Precision> >& q)
3769     {
3770         int i;
3771         int j;
3772         int ip1;
3773         ap::template_1d_array< amp::ampf<Precision> > v;
3774         ap::template_1d_array< amp::ampf<Precision> > work;
3775         int vm;
3776 
3777 
3778         ap::ap_error::make_assertion(qcolumns<=m);
3779         if( m==0 || n==0 || qcolumns==0 )
3780         {
3781             return;
3782         }
3783 
3784         //
3785         // init
3786         //
3787         q.setbounds(1, m, 1, qcolumns);
3788         v.setbounds(1, m);
3789         work.setbounds(1, qcolumns);
3790 
3791         //
3792         // prepare Q
3793         //
3794         for(i=1; i<=m; i++)
3795         {
3796             for(j=1; j<=qcolumns; j++)
3797             {
3798                 if( i==j )
3799                 {
3800                     q(i,j) = 1;
3801                 }
3802                 else
3803                 {
3804                     q(i,j) = 0;
3805                 }
3806             }
3807         }
3808         if( m>=n )
3809         {
3810             for(i=ap::minint(n, qcolumns); i>=1; i--)
3811             {
3812                 vm = m-i+1;
3813                 ap::vmove(v.getvector(1, vm), qp.getcolumn(i, i, m));
3814                 v(1) = 1;
3815                 reflections::applyreflectionfromtheleft<Precision>(q, tauq(i), v, i, m, 1, qcolumns, work);
3816             }
3817         }
3818         else
3819         {
3820             for(i=ap::minint(m-1, qcolumns-1); i>=1; i--)
3821             {
3822                 vm = m-i;
3823                 ip1 = i+1;
3824                 ap::vmove(v.getvector(1, vm), qp.getcolumn(i, ip1, m));
3825                 v(1) = 1;
3826                 reflections::applyreflectionfromtheleft<Precision>(q, tauq(i), v, i+1, m, 1, qcolumns, work);
3827             }
3828         }
3829     }
3830 
3831 
3832     /*************************************************************************
3833     Obsolete 1-based subroutine.
3834     See RMatrixBDMultiplyByQ for 0-based replacement.
3835     *************************************************************************/
3836     template<unsigned int Precision>
multiplybyqfrombidiagonal(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tauq,ap::template_2d_array<amp::ampf<Precision>> & z,int zrows,int zcolumns,bool fromtheright,bool dotranspose)3837     void multiplybyqfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
3838         int m,
3839         int n,
3840         const ap::template_1d_array< amp::ampf<Precision> >& tauq,
3841         ap::template_2d_array< amp::ampf<Precision> >& z,
3842         int zrows,
3843         int zcolumns,
3844         bool fromtheright,
3845         bool dotranspose)
3846     {
3847         int i;
3848         int ip1;
3849         int i1;
3850         int i2;
3851         int istep;
3852         ap::template_1d_array< amp::ampf<Precision> > v;
3853         ap::template_1d_array< amp::ampf<Precision> > work;
3854         int vm;
3855         int mx;
3856 
3857 
3858         if( m<=0 || n<=0 || zrows<=0 || zcolumns<=0 )
3859         {
3860             return;
3861         }
3862         ap::ap_error::make_assertion(fromtheright ? zcolumns==m : zrows==m);
3863 
3864         //
3865         // init
3866         //
3867         mx = ap::maxint(m, n);
3868         mx = ap::maxint(mx, zrows);
3869         mx = ap::maxint(mx, zcolumns);
3870         v.setbounds(1, mx);
3871         work.setbounds(1, mx);
3872         if( m>=n )
3873         {
3874 
3875             //
3876             // setup
3877             //
3878             if( fromtheright )
3879             {
3880                 i1 = 1;
3881                 i2 = n;
3882                 istep = +1;
3883             }
3884             else
3885             {
3886                 i1 = n;
3887                 i2 = 1;
3888                 istep = -1;
3889             }
3890             if( dotranspose )
3891             {
3892                 i = i1;
3893                 i1 = i2;
3894                 i2 = i;
3895                 istep = -istep;
3896             }
3897 
3898             //
3899             // Process
3900             //
3901             i = i1;
3902             do
3903             {
3904                 vm = m-i+1;
3905                 ap::vmove(v.getvector(1, vm), qp.getcolumn(i, i, m));
3906                 v(1) = 1;
3907                 if( fromtheright )
3908                 {
3909                     reflections::applyreflectionfromtheright<Precision>(z, tauq(i), v, 1, zrows, i, m, work);
3910                 }
3911                 else
3912                 {
3913                     reflections::applyreflectionfromtheleft<Precision>(z, tauq(i), v, i, m, 1, zcolumns, work);
3914                 }
3915                 i = i+istep;
3916             }
3917             while( i!=i2+istep );
3918         }
3919         else
3920         {
3921 
3922             //
3923             // setup
3924             //
3925             if( fromtheright )
3926             {
3927                 i1 = 1;
3928                 i2 = m-1;
3929                 istep = +1;
3930             }
3931             else
3932             {
3933                 i1 = m-1;
3934                 i2 = 1;
3935                 istep = -1;
3936             }
3937             if( dotranspose )
3938             {
3939                 i = i1;
3940                 i1 = i2;
3941                 i2 = i;
3942                 istep = -istep;
3943             }
3944 
3945             //
3946             // Process
3947             //
3948             if( m-1>0 )
3949             {
3950                 i = i1;
3951                 do
3952                 {
3953                     vm = m-i;
3954                     ip1 = i+1;
3955                     ap::vmove(v.getvector(1, vm), qp.getcolumn(i, ip1, m));
3956                     v(1) = 1;
3957                     if( fromtheright )
3958                     {
3959                         reflections::applyreflectionfromtheright<Precision>(z, tauq(i), v, 1, zrows, i+1, m, work);
3960                     }
3961                     else
3962                     {
3963                         reflections::applyreflectionfromtheleft<Precision>(z, tauq(i), v, i+1, m, 1, zcolumns, work);
3964                     }
3965                     i = i+istep;
3966                 }
3967                 while( i!=i2+istep );
3968             }
3969         }
3970     }
3971 
3972 
3973     /*************************************************************************
3974     Obsolete 1-based subroutine.
3975     See RMatrixBDUnpackPT for 0-based replacement.
3976     *************************************************************************/
3977     template<unsigned int Precision>
unpackptfrombidiagonal(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & taup,int ptrows,ap::template_2d_array<amp::ampf<Precision>> & pt)3978     void unpackptfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
3979         int m,
3980         int n,
3981         const ap::template_1d_array< amp::ampf<Precision> >& taup,
3982         int ptrows,
3983         ap::template_2d_array< amp::ampf<Precision> >& pt)
3984     {
3985         int i;
3986         int j;
3987         int ip1;
3988         ap::template_1d_array< amp::ampf<Precision> > v;
3989         ap::template_1d_array< amp::ampf<Precision> > work;
3990         int vm;
3991 
3992 
3993         ap::ap_error::make_assertion(ptrows<=n);
3994         if( m==0 || n==0 || ptrows==0 )
3995         {
3996             return;
3997         }
3998 
3999         //
4000         // init
4001         //
4002         pt.setbounds(1, ptrows, 1, n);
4003         v.setbounds(1, n);
4004         work.setbounds(1, ptrows);
4005 
4006         //
4007         // prepare PT
4008         //
4009         for(i=1; i<=ptrows; i++)
4010         {
4011             for(j=1; j<=n; j++)
4012             {
4013                 if( i==j )
4014                 {
4015                     pt(i,j) = 1;
4016                 }
4017                 else
4018                 {
4019                     pt(i,j) = 0;
4020                 }
4021             }
4022         }
4023         if( m>=n )
4024         {
4025             for(i=ap::minint(n-1, ptrows-1); i>=1; i--)
4026             {
4027                 vm = n-i;
4028                 ip1 = i+1;
4029                 ap::vmove(v.getvector(1, vm), qp.getrow(i, ip1, n));
4030                 v(1) = 1;
4031                 reflections::applyreflectionfromtheright<Precision>(pt, taup(i), v, 1, ptrows, i+1, n, work);
4032             }
4033         }
4034         else
4035         {
4036             for(i=ap::minint(m, ptrows); i>=1; i--)
4037             {
4038                 vm = n-i+1;
4039                 ap::vmove(v.getvector(1, vm), qp.getrow(i, i, n));
4040                 v(1) = 1;
4041                 reflections::applyreflectionfromtheright<Precision>(pt, taup(i), v, 1, ptrows, i, n, work);
4042             }
4043         }
4044     }
4045 
4046 
4047     /*************************************************************************
4048     Obsolete 1-based subroutine.
4049     See RMatrixBDMultiplyByP for 0-based replacement.
4050     *************************************************************************/
4051     template<unsigned int Precision>
multiplybypfrombidiagonal(const ap::template_2d_array<amp::ampf<Precision>> & qp,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & taup,ap::template_2d_array<amp::ampf<Precision>> & z,int zrows,int zcolumns,bool fromtheright,bool dotranspose)4052     void multiplybypfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& qp,
4053         int m,
4054         int n,
4055         const ap::template_1d_array< amp::ampf<Precision> >& taup,
4056         ap::template_2d_array< amp::ampf<Precision> >& z,
4057         int zrows,
4058         int zcolumns,
4059         bool fromtheright,
4060         bool dotranspose)
4061     {
4062         int i;
4063         int ip1;
4064         ap::template_1d_array< amp::ampf<Precision> > v;
4065         ap::template_1d_array< amp::ampf<Precision> > work;
4066         int vm;
4067         int mx;
4068         int i1;
4069         int i2;
4070         int istep;
4071 
4072 
4073         if( m<=0 || n<=0 || zrows<=0 || zcolumns<=0 )
4074         {
4075             return;
4076         }
4077         ap::ap_error::make_assertion(fromtheright ? zcolumns==n : zrows==n);
4078 
4079         //
4080         // init
4081         //
4082         mx = ap::maxint(m, n);
4083         mx = ap::maxint(mx, zrows);
4084         mx = ap::maxint(mx, zcolumns);
4085         v.setbounds(1, mx);
4086         work.setbounds(1, mx);
4087         v.setbounds(1, mx);
4088         work.setbounds(1, mx);
4089         if( m>=n )
4090         {
4091 
4092             //
4093             // setup
4094             //
4095             if( fromtheright )
4096             {
4097                 i1 = n-1;
4098                 i2 = 1;
4099                 istep = -1;
4100             }
4101             else
4102             {
4103                 i1 = 1;
4104                 i2 = n-1;
4105                 istep = +1;
4106             }
4107             if( !dotranspose )
4108             {
4109                 i = i1;
4110                 i1 = i2;
4111                 i2 = i;
4112                 istep = -istep;
4113             }
4114 
4115             //
4116             // Process
4117             //
4118             if( n-1>0 )
4119             {
4120                 i = i1;
4121                 do
4122                 {
4123                     vm = n-i;
4124                     ip1 = i+1;
4125                     ap::vmove(v.getvector(1, vm), qp.getrow(i, ip1, n));
4126                     v(1) = 1;
4127                     if( fromtheright )
4128                     {
4129                         reflections::applyreflectionfromtheright<Precision>(z, taup(i), v, 1, zrows, i+1, n, work);
4130                     }
4131                     else
4132                     {
4133                         reflections::applyreflectionfromtheleft<Precision>(z, taup(i), v, i+1, n, 1, zcolumns, work);
4134                     }
4135                     i = i+istep;
4136                 }
4137                 while( i!=i2+istep );
4138             }
4139         }
4140         else
4141         {
4142 
4143             //
4144             // setup
4145             //
4146             if( fromtheright )
4147             {
4148                 i1 = m;
4149                 i2 = 1;
4150                 istep = -1;
4151             }
4152             else
4153             {
4154                 i1 = 1;
4155                 i2 = m;
4156                 istep = +1;
4157             }
4158             if( !dotranspose )
4159             {
4160                 i = i1;
4161                 i1 = i2;
4162                 i2 = i;
4163                 istep = -istep;
4164             }
4165 
4166             //
4167             // Process
4168             //
4169             i = i1;
4170             do
4171             {
4172                 vm = n-i+1;
4173                 ap::vmove(v.getvector(1, vm), qp.getrow(i, i, n));
4174                 v(1) = 1;
4175                 if( fromtheright )
4176                 {
4177                     reflections::applyreflectionfromtheright<Precision>(z, taup(i), v, 1, zrows, i, n, work);
4178                 }
4179                 else
4180                 {
4181                     reflections::applyreflectionfromtheleft<Precision>(z, taup(i), v, i, n, 1, zcolumns, work);
4182                 }
4183                 i = i+istep;
4184             }
4185             while( i!=i2+istep );
4186         }
4187     }
4188 
4189 
4190     /*************************************************************************
4191     Obsolete 1-based subroutine.
4192     See RMatrixBDUnpackDiagonals for 0-based replacement.
4193     *************************************************************************/
4194     template<unsigned int Precision>
unpackdiagonalsfrombidiagonal(const ap::template_2d_array<amp::ampf<Precision>> & b,int m,int n,bool & isupper,ap::template_1d_array<amp::ampf<Precision>> & d,ap::template_1d_array<amp::ampf<Precision>> & e)4195     void unpackdiagonalsfrombidiagonal(const ap::template_2d_array< amp::ampf<Precision> >& b,
4196         int m,
4197         int n,
4198         bool& isupper,
4199         ap::template_1d_array< amp::ampf<Precision> >& d,
4200         ap::template_1d_array< amp::ampf<Precision> >& e)
4201     {
4202         int i;
4203 
4204 
4205         isupper = m>=n;
4206         if( m==0 || n==0 )
4207         {
4208             return;
4209         }
4210         if( isupper )
4211         {
4212             d.setbounds(1, n);
4213             e.setbounds(1, n);
4214             for(i=1; i<=n-1; i++)
4215             {
4216                 d(i) = b(i,i);
4217                 e(i) = b(i,i+1);
4218             }
4219             d(n) = b(n,n);
4220         }
4221         else
4222         {
4223             d.setbounds(1, m);
4224             e.setbounds(1, m);
4225             for(i=1; i<=m-1; i++)
4226             {
4227                 d(i) = b(i,i);
4228                 e(i) = b(i+1,i);
4229             }
4230             d(m) = b(m,m);
4231         }
4232     }
4233 } // namespace
4234 
4235 /* stuff included from ./qr.h */
4236 
4237 /*************************************************************************
4238 Copyright (c) 1992-2007 The University of Tennessee.  All rights reserved.
4239 
4240 Contributors:
4241     * Sergey Bochkanov (ALGLIB project). Translation from FORTRAN to
4242       pseudocode.
4243 
4244 See subroutines comments for additional copyrights.
4245 
4246 Redistribution and use in source and binary forms, with or without
4247 modification, are permitted provided that the following conditions are
4248 met:
4249 
4250 - Redistributions of source code must retain the above copyright
4251   notice, this list of conditions and the following disclaimer.
4252 
4253 - Redistributions in binary form must reproduce the above copyright
4254   notice, this list of conditions and the following disclaimer listed
4255   in this license in the documentation and/or other materials
4256   provided with the distribution.
4257 
4258 - Neither the name of the copyright holders nor the names of its
4259   contributors may be used to endorse or promote products derived from
4260   this software without specific prior written permission.
4261 
4262 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
4263 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
4264 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
4265 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
4266 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4267 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
4268 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
4269 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
4270 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
4271 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
4272 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4273 *************************************************************************/
4274 
4275 namespace qr
4276 {
4277     template<unsigned int Precision>
4278     void rmatrixqr(ap::template_2d_array< amp::ampf<Precision> >& a,
4279         int m,
4280         int n,
4281         ap::template_1d_array< amp::ampf<Precision> >& tau);
4282     template<unsigned int Precision>
4283     void rmatrixqrunpackq(const ap::template_2d_array< amp::ampf<Precision> >& a,
4284         int m,
4285         int n,
4286         const ap::template_1d_array< amp::ampf<Precision> >& tau,
4287         int qcolumns,
4288         ap::template_2d_array< amp::ampf<Precision> >& q);
4289     template<unsigned int Precision>
4290     void rmatrixqrunpackr(const ap::template_2d_array< amp::ampf<Precision> >& a,
4291         int m,
4292         int n,
4293         ap::template_2d_array< amp::ampf<Precision> >& r);
4294     template<unsigned int Precision>
4295     void qrdecomposition(ap::template_2d_array< amp::ampf<Precision> >& a,
4296         int m,
4297         int n,
4298         ap::template_1d_array< amp::ampf<Precision> >& tau);
4299     template<unsigned int Precision>
4300     void unpackqfromqr(const ap::template_2d_array< amp::ampf<Precision> >& a,
4301         int m,
4302         int n,
4303         const ap::template_1d_array< amp::ampf<Precision> >& tau,
4304         int qcolumns,
4305         ap::template_2d_array< amp::ampf<Precision> >& q);
4306     template<unsigned int Precision>
4307     void qrdecompositionunpacked(ap::template_2d_array< amp::ampf<Precision> > a,
4308         int m,
4309         int n,
4310         ap::template_2d_array< amp::ampf<Precision> >& q,
4311         ap::template_2d_array< amp::ampf<Precision> >& r);
4312 
4313 
4314     /*************************************************************************
4315     QR decomposition of a rectangular matrix of size MxN
4316 
4317     Input parameters:
4318         A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
4319         M   -   number of rows in matrix A.
4320         N   -   number of columns in matrix A.
4321 
4322     Output parameters:
4323         A   -   matrices Q and R in compact form (see below).
4324         Tau -   array of scalar factors which are used to form
4325                 matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].
4326 
4327     Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
4328     MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.
4329 
4330     The elements of matrix R are located on and above the main diagonal of
4331     matrix A. The elements which are located in Tau array and below the main
4332     diagonal of matrix A are used to form matrix Q as follows:
4333 
4334     Matrix Q is represented as a product of elementary reflections
4335 
4336     Q = H(0)*H(2)*...*H(k-1),
4337 
4338     where k = min(m,n), and each H(i) is in the form
4339 
4340     H(i) = 1 - tau * v * (v^T)
4341 
4342     where tau is a scalar stored in Tau[I]; v - real vector,
4343     so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).
4344 
4345       -- LAPACK routine (version 3.0) --
4346          Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
4347          Courant Institute, Argonne National Lab, and Rice University
4348          February 29, 1992.
4349          Translation from FORTRAN to pseudocode (AlgoPascal)
4350          by Sergey Bochkanov, ALGLIB project, 2005-2007.
4351     *************************************************************************/
4352     template<unsigned int Precision>
rmatrixqr(ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_1d_array<amp::ampf<Precision>> & tau)4353     void rmatrixqr(ap::template_2d_array< amp::ampf<Precision> >& a,
4354         int m,
4355         int n,
4356         ap::template_1d_array< amp::ampf<Precision> >& tau)
4357     {
4358         ap::template_1d_array< amp::ampf<Precision> > work;
4359         ap::template_1d_array< amp::ampf<Precision> > t;
4360         int i;
4361         int k;
4362         int minmn;
4363         amp::ampf<Precision> tmp;
4364 
4365 
4366         if( m<=0 || n<=0 )
4367         {
4368             return;
4369         }
4370         minmn = ap::minint(m, n);
4371         work.setbounds(0, n-1);
4372         t.setbounds(1, m);
4373         tau.setbounds(0, minmn-1);
4374 
4375         //
4376         // Test the input arguments
4377         //
4378         k = minmn;
4379         for(i=0; i<=k-1; i++)
4380         {
4381 
4382             //
4383             // Generate elementary reflector H(i) to annihilate A(i+1:m,i)
4384             //
4385             ap::vmove(t.getvector(1, m-i), a.getcolumn(i, i, m-1));
4386             reflections::generatereflection<Precision>(t, m-i, tmp);
4387             tau(i) = tmp;
4388             ap::vmove(a.getcolumn(i, i, m-1), t.getvector(1, m-i));
4389             t(1) = 1;
4390             if( i<n )
4391             {
4392 
4393                 //
4394                 // Apply H(i) to A(i:m-1,i+1:n-1) from the left
4395                 //
4396                 reflections::applyreflectionfromtheleft<Precision>(a, tau(i), t, i, m-1, i+1, n-1, work);
4397             }
4398         }
4399     }
4400 
4401 
4402     /*************************************************************************
4403     Partial unpacking of matrix Q from the QR decomposition of a matrix A
4404 
4405     Input parameters:
4406         A       -   matrices Q and R in compact form.
4407                     Output of RMatrixQR subroutine.
4408         M       -   number of rows in given matrix A. M>=0.
4409         N       -   number of columns in given matrix A. N>=0.
4410         Tau     -   scalar factors which are used to form Q.
4411                     Output of the RMatrixQR subroutine.
4412         QColumns -  required number of columns of matrix Q. M>=QColumns>=0.
4413 
4414     Output parameters:
4415         Q       -   first QColumns columns of matrix Q.
4416                     Array whose indexes range within [0..M-1, 0..QColumns-1].
4417                     If QColumns=0, the array remains unchanged.
4418 
4419       -- ALGLIB --
4420          Copyright 2005 by Bochkanov Sergey
4421     *************************************************************************/
4422     template<unsigned int Precision>
rmatrixqrunpackq(const ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tau,int qcolumns,ap::template_2d_array<amp::ampf<Precision>> & q)4423     void rmatrixqrunpackq(const ap::template_2d_array< amp::ampf<Precision> >& a,
4424         int m,
4425         int n,
4426         const ap::template_1d_array< amp::ampf<Precision> >& tau,
4427         int qcolumns,
4428         ap::template_2d_array< amp::ampf<Precision> >& q)
4429     {
4430         int i;
4431         int j;
4432         int k;
4433         int minmn;
4434         ap::template_1d_array< amp::ampf<Precision> > v;
4435         ap::template_1d_array< amp::ampf<Precision> > work;
4436 
4437 
4438         ap::ap_error::make_assertion(qcolumns<=m);
4439         if( m<=0 || n<=0 || qcolumns<=0 )
4440         {
4441             return;
4442         }
4443 
4444         //
4445         // init
4446         //
4447         minmn = ap::minint(m, n);
4448         k = ap::minint(minmn, qcolumns);
4449         q.setbounds(0, m-1, 0, qcolumns-1);
4450         v.setbounds(1, m);
4451         work.setbounds(0, qcolumns-1);
4452         for(i=0; i<=m-1; i++)
4453         {
4454             for(j=0; j<=qcolumns-1; j++)
4455             {
4456                 if( i==j )
4457                 {
4458                     q(i,j) = 1;
4459                 }
4460                 else
4461                 {
4462                     q(i,j) = 0;
4463                 }
4464             }
4465         }
4466 
4467         //
4468         // unpack Q
4469         //
4470         for(i=k-1; i>=0; i--)
4471         {
4472 
4473             //
4474             // Apply H(i)
4475             //
4476             ap::vmove(v.getvector(1, m-i), a.getcolumn(i, i, m-1));
4477             v(1) = 1;
4478             reflections::applyreflectionfromtheleft<Precision>(q, tau(i), v, i, m-1, 0, qcolumns-1, work);
4479         }
4480     }
4481 
4482 
4483     /*************************************************************************
4484     Unpacking of matrix R from the QR decomposition of a matrix A
4485 
4486     Input parameters:
4487         A       -   matrices Q and R in compact form.
4488                     Output of RMatrixQR subroutine.
4489         M       -   number of rows in given matrix A. M>=0.
4490         N       -   number of columns in given matrix A. N>=0.
4491 
4492     Output parameters:
4493         R       -   matrix R, array[0..M-1, 0..N-1].
4494 
4495       -- ALGLIB --
4496          Copyright 2005 by Bochkanov Sergey
4497     *************************************************************************/
4498     template<unsigned int Precision>
rmatrixqrunpackr(const ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_2d_array<amp::ampf<Precision>> & r)4499     void rmatrixqrunpackr(const ap::template_2d_array< amp::ampf<Precision> >& a,
4500         int m,
4501         int n,
4502         ap::template_2d_array< amp::ampf<Precision> >& r)
4503     {
4504         int i;
4505         int k;
4506 
4507 
4508         if( m<=0 || n<=0 )
4509         {
4510             return;
4511         }
4512         k = ap::minint(m, n);
4513         r.setbounds(0, m-1, 0, n-1);
4514         for(i=0; i<=n-1; i++)
4515         {
4516             r(0,i) = 0;
4517         }
4518         for(i=1; i<=m-1; i++)
4519         {
4520             ap::vmove(r.getrow(i, 0, n-1), r.getrow(0, 0, n-1));
4521         }
4522         for(i=0; i<=k-1; i++)
4523         {
4524             ap::vmove(r.getrow(i, i, n-1), a.getrow(i, i, n-1));
4525         }
4526     }
4527 
4528 
4529     /*************************************************************************
4530     Obsolete 1-based subroutine. See RMatrixQR for 0-based replacement.
4531     *************************************************************************/
4532     template<unsigned int Precision>
qrdecomposition(ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_1d_array<amp::ampf<Precision>> & tau)4533     void qrdecomposition(ap::template_2d_array< amp::ampf<Precision> >& a,
4534         int m,
4535         int n,
4536         ap::template_1d_array< amp::ampf<Precision> >& tau)
4537     {
4538         ap::template_1d_array< amp::ampf<Precision> > work;
4539         ap::template_1d_array< amp::ampf<Precision> > t;
4540         int i;
4541         int k;
4542         int mmip1;
4543         int minmn;
4544         amp::ampf<Precision> tmp;
4545 
4546 
4547         minmn = ap::minint(m, n);
4548         work.setbounds(1, n);
4549         t.setbounds(1, m);
4550         tau.setbounds(1, minmn);
4551 
4552         //
4553         // Test the input arguments
4554         //
4555         k = ap::minint(m, n);
4556         for(i=1; i<=k; i++)
4557         {
4558 
4559             //
4560             // Generate elementary reflector H(i) to annihilate A(i+1:m,i)
4561             //
4562             mmip1 = m-i+1;
4563             ap::vmove(t.getvector(1, mmip1), a.getcolumn(i, i, m));
4564             reflections::generatereflection<Precision>(t, mmip1, tmp);
4565             tau(i) = tmp;
4566             ap::vmove(a.getcolumn(i, i, m), t.getvector(1, mmip1));
4567             t(1) = 1;
4568             if( i<n )
4569             {
4570 
4571                 //
4572                 // Apply H(i) to A(i:m,i+1:n) from the left
4573                 //
4574                 reflections::applyreflectionfromtheleft<Precision>(a, tau(i), t, i, m, i+1, n, work);
4575             }
4576         }
4577     }
4578 
4579 
4580     /*************************************************************************
4581     Obsolete 1-based subroutine. See RMatrixQRUnpackQ for 0-based replacement.
4582     *************************************************************************/
4583     template<unsigned int Precision>
unpackqfromqr(const ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tau,int qcolumns,ap::template_2d_array<amp::ampf<Precision>> & q)4584     void unpackqfromqr(const ap::template_2d_array< amp::ampf<Precision> >& a,
4585         int m,
4586         int n,
4587         const ap::template_1d_array< amp::ampf<Precision> >& tau,
4588         int qcolumns,
4589         ap::template_2d_array< amp::ampf<Precision> >& q)
4590     {
4591         int i;
4592         int j;
4593         int k;
4594         int minmn;
4595         ap::template_1d_array< amp::ampf<Precision> > v;
4596         ap::template_1d_array< amp::ampf<Precision> > work;
4597         int vm;
4598 
4599 
4600         ap::ap_error::make_assertion(qcolumns<=m);
4601         if( m==0 || n==0 || qcolumns==0 )
4602         {
4603             return;
4604         }
4605 
4606         //
4607         // init
4608         //
4609         minmn = ap::minint(m, n);
4610         k = ap::minint(minmn, qcolumns);
4611         q.setbounds(1, m, 1, qcolumns);
4612         v.setbounds(1, m);
4613         work.setbounds(1, qcolumns);
4614         for(i=1; i<=m; i++)
4615         {
4616             for(j=1; j<=qcolumns; j++)
4617             {
4618                 if( i==j )
4619                 {
4620                     q(i,j) = 1;
4621                 }
4622                 else
4623                 {
4624                     q(i,j) = 0;
4625                 }
4626             }
4627         }
4628 
4629         //
4630         // unpack Q
4631         //
4632         for(i=k; i>=1; i--)
4633         {
4634 
4635             //
4636             // Apply H(i)
4637             //
4638             vm = m-i+1;
4639             ap::vmove(v.getvector(1, vm), a.getcolumn(i, i, m));
4640             v(1) = 1;
4641             reflections::applyreflectionfromtheleft<Precision>(q, tau(i), v, i, m, 1, qcolumns, work);
4642         }
4643     }
4644 
4645 
4646     /*************************************************************************
4647     Obsolete 1-based subroutine. See RMatrixQR for 0-based replacement.
4648     *************************************************************************/
4649     template<unsigned int Precision>
qrdecompositionunpacked(ap::template_2d_array<amp::ampf<Precision>> a,int m,int n,ap::template_2d_array<amp::ampf<Precision>> & q,ap::template_2d_array<amp::ampf<Precision>> & r)4650     void qrdecompositionunpacked(ap::template_2d_array< amp::ampf<Precision> > a,
4651         int m,
4652         int n,
4653         ap::template_2d_array< amp::ampf<Precision> >& q,
4654         ap::template_2d_array< amp::ampf<Precision> >& r)
4655     {
4656         int i;
4657         int k;
4658         ap::template_1d_array< amp::ampf<Precision> > tau;
4659         ap::template_1d_array< amp::ampf<Precision> > work;
4660         ap::template_1d_array< amp::ampf<Precision> > v;
4661 
4662 
4663         k = ap::minint(m, n);
4664         if( n<=0 )
4665         {
4666             return;
4667         }
4668         work.setbounds(1, m);
4669         v.setbounds(1, m);
4670         q.setbounds(1, m, 1, m);
4671         r.setbounds(1, m, 1, n);
4672 
4673         //
4674         // QRDecomposition
4675         //
4676         qrdecomposition<Precision>(a, m, n, tau);
4677 
4678         //
4679         // R
4680         //
4681         for(i=1; i<=n; i++)
4682         {
4683             r(1,i) = 0;
4684         }
4685         for(i=2; i<=m; i++)
4686         {
4687             ap::vmove(r.getrow(i, 1, n), r.getrow(1, 1, n));
4688         }
4689         for(i=1; i<=k; i++)
4690         {
4691             ap::vmove(r.getrow(i, i, n), a.getrow(i, i, n));
4692         }
4693 
4694         //
4695         // Q
4696         //
4697         unpackqfromqr<Precision>(a, m, n, tau, m, q);
4698     }
4699 } // namespace
4700 
4701 /* stuff included from ./lq.h */
4702 
4703 /*************************************************************************
4704 Copyright (c) 2005-2007, Sergey Bochkanov (ALGLIB project).
4705 
4706 Redistribution and use in source and binary forms, with or without
4707 modification, are permitted provided that the following conditions are
4708 met:
4709 
4710 - Redistributions of source code must retain the above copyright
4711   notice, this list of conditions and the following disclaimer.
4712 
4713 - Redistributions in binary form must reproduce the above copyright
4714   notice, this list of conditions and the following disclaimer listed
4715   in this license in the documentation and/or other materials
4716   provided with the distribution.
4717 
4718 - Neither the name of the copyright holders nor the names of its
4719   contributors may be used to endorse or promote products derived from
4720   this software without specific prior written permission.
4721 
4722 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
4723 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
4724 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
4725 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
4726 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4727 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
4728 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
4729 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
4730 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
4731 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
4732 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4733 *************************************************************************/
4734 
4735 namespace lq
4736 {
4737     template<unsigned int Precision>
4738     void rmatrixlq(ap::template_2d_array< amp::ampf<Precision> >& a,
4739         int m,
4740         int n,
4741         ap::template_1d_array< amp::ampf<Precision> >& tau);
4742     template<unsigned int Precision>
4743     void rmatrixlqunpackq(const ap::template_2d_array< amp::ampf<Precision> >& a,
4744         int m,
4745         int n,
4746         const ap::template_1d_array< amp::ampf<Precision> >& tau,
4747         int qrows,
4748         ap::template_2d_array< amp::ampf<Precision> >& q);
4749     template<unsigned int Precision>
4750     void rmatrixlqunpackl(const ap::template_2d_array< amp::ampf<Precision> >& a,
4751         int m,
4752         int n,
4753         ap::template_2d_array< amp::ampf<Precision> >& l);
4754     template<unsigned int Precision>
4755     void lqdecomposition(ap::template_2d_array< amp::ampf<Precision> >& a,
4756         int m,
4757         int n,
4758         ap::template_1d_array< amp::ampf<Precision> >& tau);
4759     template<unsigned int Precision>
4760     void unpackqfromlq(const ap::template_2d_array< amp::ampf<Precision> >& a,
4761         int m,
4762         int n,
4763         const ap::template_1d_array< amp::ampf<Precision> >& tau,
4764         int qrows,
4765         ap::template_2d_array< amp::ampf<Precision> >& q);
4766     template<unsigned int Precision>
4767     void lqdecompositionunpacked(ap::template_2d_array< amp::ampf<Precision> > a,
4768         int m,
4769         int n,
4770         ap::template_2d_array< amp::ampf<Precision> >& l,
4771         ap::template_2d_array< amp::ampf<Precision> >& q);
4772 
4773 
4774     /*************************************************************************
4775     LQ decomposition of a rectangular matrix of size MxN
4776 
4777     Input parameters:
4778         A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
4779         M   -   number of rows in matrix A.
4780         N   -   number of columns in matrix A.
4781 
4782     Output parameters:
4783         A   -   matrices L and Q in compact form (see below)
4784         Tau -   array of scalar factors which are used to form
4785                 matrix Q. Array whose index ranges within [0..Min(M,N)-1].
4786 
4787     Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
4788     MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.
4789 
4790     The elements of matrix L are located on and below  the  main  diagonal  of
4791     matrix A. The elements which are located in Tau array and above  the  main
4792     diagonal of matrix A are used to form matrix Q as follows:
4793 
4794     Matrix Q is represented as a product of elementary reflections
4795 
4796     Q = H(k-1)*H(k-2)*...*H(1)*H(0),
4797 
4798     where k = min(m,n), and each H(i) is of the form
4799 
4800     H(i) = 1 - tau * v * (v^T)
4801 
4802     where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
4803     v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).
4804 
4805       -- ALGLIB --
4806          Copyright 2005-2007 by Bochkanov Sergey
4807     *************************************************************************/
4808     template<unsigned int Precision>
rmatrixlq(ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_1d_array<amp::ampf<Precision>> & tau)4809     void rmatrixlq(ap::template_2d_array< amp::ampf<Precision> >& a,
4810         int m,
4811         int n,
4812         ap::template_1d_array< amp::ampf<Precision> >& tau)
4813     {
4814         ap::template_1d_array< amp::ampf<Precision> > work;
4815         ap::template_1d_array< amp::ampf<Precision> > t;
4816         int i;
4817         int k;
4818         int minmn;
4819         int maxmn;
4820         amp::ampf<Precision> tmp;
4821 
4822 
4823         minmn = ap::minint(m, n);
4824         maxmn = ap::maxint(m, n);
4825         work.setbounds(0, m);
4826         t.setbounds(0, n);
4827         tau.setbounds(0, minmn-1);
4828         k = ap::minint(m, n);
4829         for(i=0; i<=k-1; i++)
4830         {
4831 
4832             //
4833             // Generate elementary reflector H(i) to annihilate A(i,i+1:n-1)
4834             //
4835             ap::vmove(t.getvector(1, n-i), a.getrow(i, i, n-1));
4836             reflections::generatereflection<Precision>(t, n-i, tmp);
4837             tau(i) = tmp;
4838             ap::vmove(a.getrow(i, i, n-1), t.getvector(1, n-i));
4839             t(1) = 1;
4840             if( i<n )
4841             {
4842 
4843                 //
4844                 // Apply H(i) to A(i+1:m,i:n) from the right
4845                 //
4846                 reflections::applyreflectionfromtheright<Precision>(a, tau(i), t, i+1, m-1, i, n-1, work);
4847             }
4848         }
4849     }
4850 
4851 
4852     /*************************************************************************
4853     Partial unpacking of matrix Q from the LQ decomposition of a matrix A
4854 
4855     Input parameters:
4856         A       -   matrices L and Q in compact form.
4857                     Output of RMatrixLQ subroutine.
4858         M       -   number of rows in given matrix A. M>=0.
4859         N       -   number of columns in given matrix A. N>=0.
4860         Tau     -   scalar factors which are used to form Q.
4861                     Output of the RMatrixLQ subroutine.
4862         QRows   -   required number of rows in matrix Q. N>=QRows>=0.
4863 
4864     Output parameters:
4865         Q       -   first QRows rows of matrix Q. Array whose indexes range
4866                     within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
4867                     unchanged.
4868 
4869       -- ALGLIB --
4870          Copyright 2005 by Bochkanov Sergey
4871     *************************************************************************/
4872     template<unsigned int Precision>
rmatrixlqunpackq(const ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tau,int qrows,ap::template_2d_array<amp::ampf<Precision>> & q)4873     void rmatrixlqunpackq(const ap::template_2d_array< amp::ampf<Precision> >& a,
4874         int m,
4875         int n,
4876         const ap::template_1d_array< amp::ampf<Precision> >& tau,
4877         int qrows,
4878         ap::template_2d_array< amp::ampf<Precision> >& q)
4879     {
4880         int i;
4881         int j;
4882         int k;
4883         int minmn;
4884         ap::template_1d_array< amp::ampf<Precision> > v;
4885         ap::template_1d_array< amp::ampf<Precision> > work;
4886 
4887 
4888         ap::ap_error::make_assertion(qrows<=n);
4889         if( m<=0 || n<=0 || qrows<=0 )
4890         {
4891             return;
4892         }
4893 
4894         //
4895         // init
4896         //
4897         minmn = ap::minint(m, n);
4898         k = ap::minint(minmn, qrows);
4899         q.setbounds(0, qrows-1, 0, n-1);
4900         v.setbounds(0, n);
4901         work.setbounds(0, qrows);
4902         for(i=0; i<=qrows-1; i++)
4903         {
4904             for(j=0; j<=n-1; j++)
4905             {
4906                 if( i==j )
4907                 {
4908                     q(i,j) = 1;
4909                 }
4910                 else
4911                 {
4912                     q(i,j) = 0;
4913                 }
4914             }
4915         }
4916 
4917         //
4918         // unpack Q
4919         //
4920         for(i=k-1; i>=0; i--)
4921         {
4922 
4923             //
4924             // Apply H(i)
4925             //
4926             ap::vmove(v.getvector(1, n-i), a.getrow(i, i, n-1));
4927             v(1) = 1;
4928             reflections::applyreflectionfromtheright<Precision>(q, tau(i), v, 0, qrows-1, i, n-1, work);
4929         }
4930     }
4931 
4932 
4933     /*************************************************************************
4934     Unpacking of matrix L from the LQ decomposition of a matrix A
4935 
4936     Input parameters:
4937         A       -   matrices Q and L in compact form.
4938                     Output of RMatrixLQ subroutine.
4939         M       -   number of rows in given matrix A. M>=0.
4940         N       -   number of columns in given matrix A. N>=0.
4941 
4942     Output parameters:
4943         L       -   matrix L, array[0..M-1, 0..N-1].
4944 
4945       -- ALGLIB --
4946          Copyright 2005 by Bochkanov Sergey
4947     *************************************************************************/
4948     template<unsigned int Precision>
rmatrixlqunpackl(const ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_2d_array<amp::ampf<Precision>> & l)4949     void rmatrixlqunpackl(const ap::template_2d_array< amp::ampf<Precision> >& a,
4950         int m,
4951         int n,
4952         ap::template_2d_array< amp::ampf<Precision> >& l)
4953     {
4954         int i;
4955         int k;
4956 
4957 
4958         if( m<=0 || n<=0 )
4959         {
4960             return;
4961         }
4962         l.setbounds(0, m-1, 0, n-1);
4963         for(i=0; i<=n-1; i++)
4964         {
4965             l(0,i) = 0;
4966         }
4967         for(i=1; i<=m-1; i++)
4968         {
4969             ap::vmove(l.getrow(i, 0, n-1), l.getrow(0, 0, n-1));
4970         }
4971         for(i=0; i<=m-1; i++)
4972         {
4973             k = ap::minint(i, n-1);
4974             ap::vmove(l.getrow(i, 0, k), a.getrow(i, 0, k));
4975         }
4976     }
4977 
4978 
4979     /*************************************************************************
4980     Obsolete 1-based subroutine
4981     See RMatrixLQ for 0-based replacement.
4982     *************************************************************************/
4983     template<unsigned int Precision>
lqdecomposition(ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,ap::template_1d_array<amp::ampf<Precision>> & tau)4984     void lqdecomposition(ap::template_2d_array< amp::ampf<Precision> >& a,
4985         int m,
4986         int n,
4987         ap::template_1d_array< amp::ampf<Precision> >& tau)
4988     {
4989         ap::template_1d_array< amp::ampf<Precision> > work;
4990         ap::template_1d_array< amp::ampf<Precision> > t;
4991         int i;
4992         int k;
4993         int nmip1;
4994         int minmn;
4995         int maxmn;
4996         amp::ampf<Precision> tmp;
4997 
4998 
4999         minmn = ap::minint(m, n);
5000         maxmn = ap::maxint(m, n);
5001         work.setbounds(1, m);
5002         t.setbounds(1, n);
5003         tau.setbounds(1, minmn);
5004 
5005         //
5006         // Test the input arguments
5007         //
5008         k = ap::minint(m, n);
5009         for(i=1; i<=k; i++)
5010         {
5011 
5012             //
5013             // Generate elementary reflector H(i) to annihilate A(i,i+1:n)
5014             //
5015             nmip1 = n-i+1;
5016             ap::vmove(t.getvector(1, nmip1), a.getrow(i, i, n));
5017             reflections::generatereflection<Precision>(t, nmip1, tmp);
5018             tau(i) = tmp;
5019             ap::vmove(a.getrow(i, i, n), t.getvector(1, nmip1));
5020             t(1) = 1;
5021             if( i<n )
5022             {
5023 
5024                 //
5025                 // Apply H(i) to A(i+1:m,i:n) from the right
5026                 //
5027                 reflections::applyreflectionfromtheright<Precision>(a, tau(i), t, i+1, m, i, n, work);
5028             }
5029         }
5030     }
5031 
5032 
5033     /*************************************************************************
5034     Obsolete 1-based subroutine
5035     See RMatrixLQUnpackQ for 0-based replacement.
5036     *************************************************************************/
5037     template<unsigned int Precision>
unpackqfromlq(const ap::template_2d_array<amp::ampf<Precision>> & a,int m,int n,const ap::template_1d_array<amp::ampf<Precision>> & tau,int qrows,ap::template_2d_array<amp::ampf<Precision>> & q)5038     void unpackqfromlq(const ap::template_2d_array< amp::ampf<Precision> >& a,
5039         int m,
5040         int n,
5041         const ap::template_1d_array< amp::ampf<Precision> >& tau,
5042         int qrows,
5043         ap::template_2d_array< amp::ampf<Precision> >& q)
5044     {
5045         int i;
5046         int j;
5047         int k;
5048         int minmn;
5049         ap::template_1d_array< amp::ampf<Precision> > v;
5050         ap::template_1d_array< amp::ampf<Precision> > work;
5051         int vm;
5052 
5053 
5054         ap::ap_error::make_assertion(qrows<=n);
5055         if( m==0 || n==0 || qrows==0 )
5056         {
5057             return;
5058         }
5059 
5060         //
5061         // init
5062         //
5063         minmn = ap::minint(m, n);
5064         k = ap::minint(minmn, qrows);
5065         q.setbounds(1, qrows, 1, n);
5066         v.setbounds(1, n);
5067         work.setbounds(1, qrows);
5068         for(i=1; i<=qrows; i++)
5069         {
5070             for(j=1; j<=n; j++)
5071             {
5072                 if( i==j )
5073                 {
5074                     q(i,j) = 1;
5075                 }
5076                 else
5077                 {
5078                     q(i,j) = 0;
5079                 }
5080             }
5081         }
5082 
5083         //
5084         // unpack Q
5085         //
5086         for(i=k; i>=1; i--)
5087         {
5088 
5089             //
5090             // Apply H(i)
5091             //
5092             vm = n-i+1;
5093             ap::vmove(v.getvector(1, vm), a.getrow(i, i, n));
5094             v(1) = 1;
5095             reflections::applyreflectionfromtheright<Precision>(q, tau(i), v, 1, qrows, i, n, work);
5096         }
5097     }
5098 
5099 
5100     /*************************************************************************
5101     Obsolete 1-based subroutine
5102     *************************************************************************/
5103     template<unsigned int Precision>
lqdecompositionunpacked(ap::template_2d_array<amp::ampf<Precision>> a,int m,int n,ap::template_2d_array<amp::ampf<Precision>> & l,ap::template_2d_array<amp::ampf<Precision>> & q)5104     void lqdecompositionunpacked(ap::template_2d_array< amp::ampf<Precision> > a,
5105         int m,
5106         int n,
5107         ap::template_2d_array< amp::ampf<Precision> >& l,
5108         ap::template_2d_array< amp::ampf<Precision> >& q)
5109     {
5110         int i;
5111         int j;
5112         ap::template_1d_array< amp::ampf<Precision> > tau;
5113 
5114 
5115         if( n<=0 )
5116         {
5117             return;
5118         }
5119         q.setbounds(1, n, 1, n);
5120         l.setbounds(1, m, 1, n);
5121 
5122         //
5123         // LQDecomposition
5124         //
5125         lqdecomposition<Precision>(a, m, n, tau);
5126 
5127         //
5128         // L
5129         //
5130         for(i=1; i<=m; i++)
5131         {
5132             for(j=1; j<=n; j++)
5133             {
5134                 if( j>i )
5135                 {
5136                     l(i,j) = 0;
5137                 }
5138                 else
5139                 {
5140                     l(i,j) = a(i,j);
5141                 }
5142             }
5143         }
5144 
5145         //
5146         // Q
5147         //
5148         unpackqfromlq<Precision>(a, m, n, tau, n, q);
5149     }
5150 } // namespace
5151 
5152 /* stuff included from ./blas.h */
5153 
5154 /*************************************************************************
5155 Copyright (c) 2005-2007, Sergey Bochkanov (ALGLIB project).
5156 
5157 Redistribution and use in source and binary forms, with or without
5158 modification, are permitted provided that the following conditions are
5159 met:
5160 
5161 - Redistributions of source code must retain the above copyright
5162   notice, this list of conditions and the following disclaimer.
5163 
5164 - Redistributions in binary form must reproduce the above copyright
5165   notice, this list of conditions and the following disclaimer listed
5166   in this license in the documentation and/or other materials
5167   provided with the distribution.
5168 
5169 - Neither the name of the copyright holders nor the names of its
5170   contributors may be used to endorse or promote products derived from
5171   this software without specific prior written permission.
5172 
5173 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
5174 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
5175 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
5176 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
5177 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5178 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
5179 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
5180 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
5181 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
5182 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
5183 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5184 *************************************************************************/
5185 
5186 namespace blas
5187 {
5188     template<unsigned int Precision>
5189     amp::ampf<Precision> vectornorm2(const ap::template_1d_array< amp::ampf<Precision> >& x,
5190         int i1,
5191         int i2);
5192     template<unsigned int Precision>
5193     int vectoridxabsmax(const ap::template_1d_array< amp::ampf<Precision> >& x,
5194         int i1,
5195         int i2);
5196     template<unsigned int Precision>
5197     int columnidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x,
5198         int i1,
5199         int i2,
5200         int j);
5201     template<unsigned int Precision>
5202     int rowidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x,
5203         int j1,
5204         int j2,
5205         int i);
5206     template<unsigned int Precision>
5207     amp::ampf<Precision> upperhessenberg1norm(const ap::template_2d_array< amp::ampf<Precision> >& a,
5208         int i1,
5209         int i2,
5210         int j1,
5211         int j2,
5212         ap::template_1d_array< amp::ampf<Precision> >& work);
5213     template<unsigned int Precision>
5214     void copymatrix(const ap::template_2d_array< amp::ampf<Precision> >& a,
5215         int is1,
5216         int is2,
5217         int js1,
5218         int js2,
5219         ap::template_2d_array< amp::ampf<Precision> >& b,
5220         int id1,
5221         int id2,
5222         int jd1,
5223         int jd2);
5224     template<unsigned int Precision>
5225     void inplacetranspose(ap::template_2d_array< amp::ampf<Precision> >& a,
5226         int i1,
5227         int i2,
5228         int j1,
5229         int j2,
5230         ap::template_1d_array< amp::ampf<Precision> >& work);
5231     template<unsigned int Precision>
5232     void copyandtranspose(const ap::template_2d_array< amp::ampf<Precision> >& a,
5233         int is1,
5234         int is2,
5235         int js1,
5236         int js2,
5237         ap::template_2d_array< amp::ampf<Precision> >& b,
5238         int id1,
5239         int id2,
5240         int jd1,
5241         int jd2);
5242     template<unsigned int Precision>
5243     void matrixvectormultiply(const ap::template_2d_array< amp::ampf<Precision> >& a,
5244         int i1,
5245         int i2,
5246         int j1,
5247         int j2,
5248         bool trans,
5249         const ap::template_1d_array< amp::ampf<Precision> >& x,
5250         int ix1,
5251         int ix2,
5252         amp::ampf<Precision> alpha,
5253         ap::template_1d_array< amp::ampf<Precision> >& y,
5254         int iy1,
5255         int iy2,
5256         amp::ampf<Precision> beta);
5257     template<unsigned int Precision>
5258     amp::ampf<Precision> pythag2(amp::ampf<Precision> x,
5259         amp::ampf<Precision> y);
5260     template<unsigned int Precision>
5261     void matrixmatrixmultiply(const ap::template_2d_array< amp::ampf<Precision> >& a,
5262         int ai1,
5263         int ai2,
5264         int aj1,
5265         int aj2,
5266         bool transa,
5267         const ap::template_2d_array< amp::ampf<Precision> >& b,
5268         int bi1,
5269         int bi2,
5270         int bj1,
5271         int bj2,
5272         bool transb,
5273         amp::ampf<Precision> alpha,
5274         ap::template_2d_array< amp::ampf<Precision> >& c,
5275         int ci1,
5276         int ci2,
5277         int cj1,
5278         int cj2,
5279         amp::ampf<Precision> beta,
5280         ap::template_1d_array< amp::ampf<Precision> >& work);
5281 
5282 
5283     template<unsigned int Precision>
vectornorm2(const ap::template_1d_array<amp::ampf<Precision>> & x,int i1,int i2)5284     amp::ampf<Precision> vectornorm2(const ap::template_1d_array< amp::ampf<Precision> >& x,
5285         int i1,
5286         int i2)
5287     {
5288         amp::ampf<Precision> result;
5289         int n;
5290         int ix;
5291         amp::ampf<Precision> absxi;
5292         amp::ampf<Precision> scl;
5293         amp::ampf<Precision> ssq;
5294 
5295 
5296         n = i2-i1+1;
5297         if( n<1 )
5298         {
5299             result = 0;
5300             return result;
5301         }
5302         if( n==1 )
5303         {
5304             result = amp::abs<Precision>(x(i1));
5305             return result;
5306         }
5307         scl = 0;
5308         ssq = 1;
5309         for(ix=i1; ix<=i2; ix++)
5310         {
5311             if( x(ix)!=0 )
5312             {
5313                 absxi = amp::abs<Precision>(x(ix));
5314                 if( scl<absxi )
5315                 {
5316                     ssq = 1+ssq*amp::sqr<Precision>(scl/absxi);
5317                     scl = absxi;
5318                 }
5319                 else
5320                 {
5321                     ssq = ssq+amp::sqr<Precision>(absxi/scl);
5322                 }
5323             }
5324         }
5325         result = scl*amp::sqrt<Precision>(ssq);
5326         return result;
5327     }
5328 
5329 
5330     template<unsigned int Precision>
vectoridxabsmax(const ap::template_1d_array<amp::ampf<Precision>> & x,int i1,int i2)5331     int vectoridxabsmax(const ap::template_1d_array< amp::ampf<Precision> >& x,
5332         int i1,
5333         int i2)
5334     {
5335         int result;
5336         int i;
5337         amp::ampf<Precision> a;
5338 
5339 
5340         result = i1;
5341         a = amp::abs<Precision>(x(result));
5342         for(i=i1+1; i<=i2; i++)
5343         {
5344             if( amp::abs<Precision>(x(i))>amp::abs<Precision>(x(result)) )
5345             {
5346                 result = i;
5347             }
5348         }
5349         return result;
5350     }
5351 
5352 
5353     template<unsigned int Precision>
columnidxabsmax(const ap::template_2d_array<amp::ampf<Precision>> & x,int i1,int i2,int j)5354     int columnidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x,
5355         int i1,
5356         int i2,
5357         int j)
5358     {
5359         int result;
5360         int i;
5361         amp::ampf<Precision> a;
5362 
5363 
5364         result = i1;
5365         a = amp::abs<Precision>(x(result,j));
5366         for(i=i1+1; i<=i2; i++)
5367         {
5368             if( amp::abs<Precision>(x(i,j))>amp::abs<Precision>(x(result,j)) )
5369             {
5370                 result = i;
5371             }
5372         }
5373         return result;
5374     }
5375 
5376 
5377     template<unsigned int Precision>
rowidxabsmax(const ap::template_2d_array<amp::ampf<Precision>> & x,int j1,int j2,int i)5378     int rowidxabsmax(const ap::template_2d_array< amp::ampf<Precision> >& x,
5379         int j1,
5380         int j2,
5381         int i)
5382     {
5383         int result;
5384         int j;
5385         amp::ampf<Precision> a;
5386 
5387 
5388         result = j1;
5389         a = amp::abs<Precision>(x(i,result));
5390         for(j=j1+1; j<=j2; j++)
5391         {
5392             if( amp::abs<Precision>(x(i,j))>amp::abs<Precision>(x(i,result)) )
5393             {
5394                 result = j;
5395             }
5396         }
5397         return result;
5398     }
5399 
5400 
5401     template<unsigned int Precision>
upperhessenberg1norm(const ap::template_2d_array<amp::ampf<Precision>> & a,int i1,int i2,int j1,int j2,ap::template_1d_array<amp::ampf<Precision>> & work)5402     amp::ampf<Precision> upperhessenberg1norm(const ap::template_2d_array< amp::ampf<Precision> >& a,
5403         int i1,
5404         int i2,
5405         int j1,
5406         int j2,
5407         ap::template_1d_array< amp::ampf<Precision> >& work)
5408     {
5409         amp::ampf<Precision> result;
5410         int i;
5411         int j;
5412 
5413 
5414         ap::ap_error::make_assertion(i2-i1==j2-j1);
5415         for(j=j1; j<=j2; j++)
5416         {
5417             work(j) = 0;
5418         }
5419         for(i=i1; i<=i2; i++)
5420         {
5421             for(j=ap::maxint(j1, j1+i-i1-1); j<=j2; j++)
5422             {
5423                 work(j) = work(j)+amp::abs<Precision>(a(i,j));
5424             }
5425         }
5426         result = 0;
5427         for(j=j1; j<=j2; j++)
5428         {
5429             result = amp::maximum<Precision>(result, work(j));
5430         }
5431         return result;
5432     }
5433 
5434 
5435     template<unsigned int Precision>
copymatrix(const ap::template_2d_array<amp::ampf<Precision>> & a,int is1,int is2,int js1,int js2,ap::template_2d_array<amp::ampf<Precision>> & b,int id1,int id2,int jd1,int jd2)5436     void copymatrix(const ap::template_2d_array< amp::ampf<Precision> >& a,
5437         int is1,
5438         int is2,
5439         int js1,
5440         int js2,
5441         ap::template_2d_array< amp::ampf<Precision> >& b,
5442         int id1,
5443         int id2,
5444         int jd1,
5445         int jd2)
5446     {
5447         int isrc;
5448         int idst;
5449 
5450 
5451         if( is1>is2 || js1>js2 )
5452         {
5453             return;
5454         }
5455         ap::ap_error::make_assertion(is2-is1==id2-id1);
5456         ap::ap_error::make_assertion(js2-js1==jd2-jd1);
5457         for(isrc=is1; isrc<=is2; isrc++)
5458         {
5459             idst = isrc-is1+id1;
5460             ap::vmove(b.getrow(idst, jd1, jd2), a.getrow(isrc, js1, js2));
5461         }
5462     }
5463 
5464 
5465     template<unsigned int Precision>
inplacetranspose(ap::template_2d_array<amp::ampf<Precision>> & a,int i1,int i2,int j1,int j2,ap::template_1d_array<amp::ampf<Precision>> & work)5466     void inplacetranspose(ap::template_2d_array< amp::ampf<Precision> >& a,
5467         int i1,
5468         int i2,
5469         int j1,
5470         int j2,
5471         ap::template_1d_array< amp::ampf<Precision> >& work)
5472     {
5473         int i;
5474         int j;
5475         int ips;
5476         int jps;
5477         int l;
5478 
5479 
5480         if( i1>i2 || j1>j2 )
5481         {
5482             return;
5483         }
5484         ap::ap_error::make_assertion(i1-i2==j1-j2);
5485         for(i=i1; i<=i2-1; i++)
5486         {
5487             j = j1+i-i1;
5488             ips = i+1;
5489             jps = j1+ips-i1;
5490             l = i2-i;
5491             ap::vmove(work.getvector(1, l), a.getcolumn(j, ips, i2));
5492             ap::vmove(a.getcolumn(j, ips, i2), a.getrow(i, jps, j2));
5493             ap::vmove(a.getrow(i, jps, j2), work.getvector(1, l));
5494         }
5495     }
5496 
5497 
5498     template<unsigned int Precision>
copyandtranspose(const ap::template_2d_array<amp::ampf<Precision>> & a,int is1,int is2,int js1,int js2,ap::template_2d_array<amp::ampf<Precision>> & b,int id1,int id2,int jd1,int jd2)5499     void copyandtranspose(const ap::template_2d_array< amp::ampf<Precision> >& a,
5500         int is1,
5501         int is2,
5502         int js1,
5503         int js2,
5504         ap::template_2d_array< amp::ampf<Precision> >& b,
5505         int id1,
5506         int id2,
5507         int jd1,
5508         int jd2)
5509     {
5510         int isrc;
5511         int jdst;
5512 
5513 
5514         if( is1>is2 || js1>js2 )
5515         {
5516             return;
5517         }
5518         ap::ap_error::make_assertion(is2-is1==jd2-jd1);
5519         ap::ap_error::make_assertion(js2-js1==id2-id1);
5520         for(isrc=is1; isrc<=is2; isrc++)
5521         {
5522             jdst = isrc-is1+jd1;
5523             ap::vmove(b.getcolumn(jdst, id1, id2), a.getrow(isrc, js1, js2));
5524         }
5525     }
5526 
5527 
5528     template<unsigned int Precision>
matrixvectormultiply(const ap::template_2d_array<amp::ampf<Precision>> & a,int i1,int i2,int j1,int j2,bool trans,const ap::template_1d_array<amp::ampf<Precision>> & x,int ix1,int ix2,amp::ampf<Precision> alpha,ap::template_1d_array<amp::ampf<Precision>> & y,int iy1,int iy2,amp::ampf<Precision> beta)5529     void matrixvectormultiply(const ap::template_2d_array< amp::ampf<Precision> >& a,
5530         int i1,
5531         int i2,
5532         int j1,
5533         int j2,
5534         bool trans,
5535         const ap::template_1d_array< amp::ampf<Precision> >& x,
5536         int ix1,
5537         int ix2,
5538         amp::ampf<Precision> alpha,
5539         ap::template_1d_array< amp::ampf<Precision> >& y,
5540         int iy1,
5541         int iy2,
5542         amp::ampf<Precision> beta)
5543     {
5544         int i;
5545         amp::ampf<Precision> v;
5546 
5547 
5548         if( !trans )
5549         {
5550 
5551             //
5552             // y := alpha*A*x + beta*y;
5553             //
5554             if( i1>i2 || j1>j2 )
5555             {
5556                 return;
5557             }
5558             ap::ap_error::make_assertion(j2-j1==ix2-ix1);
5559             ap::ap_error::make_assertion(i2-i1==iy2-iy1);
5560 
5561             //
5562             // beta*y
5563             //
5564             if( beta==0 )
5565             {
5566                 for(i=iy1; i<=iy2; i++)
5567                 {
5568                     y(i) = 0;
5569                 }
5570             }
5571             else
5572             {
5573                 ap::vmul(y.getvector(iy1, iy2), beta);
5574             }
5575 
5576             //
5577             // alpha*A*x
5578             //
5579             for(i=i1; i<=i2; i++)
5580             {
5581                 v = ap::vdotproduct(a.getrow(i, j1, j2), x.getvector(ix1, ix2));
5582                 y(iy1+i-i1) = y(iy1+i-i1)+alpha*v;
5583             }
5584         }
5585         else
5586         {
5587 
5588             //
5589             // y := alpha*A'*x + beta*y;
5590             //
5591             if( i1>i2 || j1>j2 )
5592             {
5593                 return;
5594             }
5595             ap::ap_error::make_assertion(i2-i1==ix2-ix1);
5596             ap::ap_error::make_assertion(j2-j1==iy2-iy1);
5597 
5598             //
5599             // beta*y
5600             //
5601             if( beta==0 )
5602             {
5603                 for(i=iy1; i<=iy2; i++)
5604                 {
5605                     y(i) = 0;
5606                 }
5607             }
5608             else
5609             {
5610                 ap::vmul(y.getvector(iy1, iy2), beta);
5611             }
5612 
5613             //
5614             // alpha*A'*x
5615             //
5616             for(i=i1; i<=i2; i++)
5617             {
5618                 v = alpha*x(ix1+i-i1);
5619                 ap::vadd(y.getvector(iy1, iy2), a.getrow(i, j1, j2), v);
5620             }
5621         }
5622     }
5623 
5624 
5625     template<unsigned int Precision>
pythag2(amp::ampf<Precision> x,amp::ampf<Precision> y)5626     amp::ampf<Precision> pythag2(amp::ampf<Precision> x,
5627         amp::ampf<Precision> y)
5628     {
5629         amp::ampf<Precision> result;
5630         amp::ampf<Precision> w;
5631         amp::ampf<Precision> xabs;
5632         amp::ampf<Precision> yabs;
5633         amp::ampf<Precision> z;
5634 
5635 
5636         xabs = amp::abs<Precision>(x);
5637         yabs = amp::abs<Precision>(y);
5638         w = amp::maximum<Precision>(xabs, yabs);
5639         z = amp::minimum<Precision>(xabs, yabs);
5640         if( z==0 )
5641         {
5642             result = w;
5643         }
5644         else
5645         {
5646             result = w*amp::sqrt<Precision>(1+amp::sqr<Precision>(z/w));
5647         }
5648         return result;
5649     }
5650 
5651 
5652     template<unsigned int Precision>
matrixmatrixmultiply(const ap::template_2d_array<amp::ampf<Precision>> & a,int ai1,int ai2,int aj1,int aj2,bool transa,const ap::template_2d_array<amp::ampf<Precision>> & b,int bi1,int bi2,int bj1,int bj2,bool transb,amp::ampf<Precision> alpha,ap::template_2d_array<amp::ampf<Precision>> & c,int ci1,int ci2,int cj1,int cj2,amp::ampf<Precision> beta,ap::template_1d_array<amp::ampf<Precision>> & work)5653     void matrixmatrixmultiply(const ap::template_2d_array< amp::ampf<Precision> >& a,
5654         int ai1,
5655         int ai2,
5656         int aj1,
5657         int aj2,
5658         bool transa,
5659         const ap::template_2d_array< amp::ampf<Precision> >& b,
5660         int bi1,
5661         int bi2,
5662         int bj1,
5663         int bj2,
5664         bool transb,
5665         amp::ampf<Precision> alpha,
5666         ap::template_2d_array< amp::ampf<Precision> >& c,
5667         int ci1,
5668         int ci2,
5669         int cj1,
5670         int cj2,
5671         amp::ampf<Precision> beta,
5672         ap::template_1d_array< amp::ampf<Precision> >& work)
5673     {
5674         int arows;
5675         int acols;
5676         int brows;
5677         int bcols;
5678         int crows;
5679         int ccols;
5680         int i;
5681         int j;
5682         int k;
5683         int l;
5684         int r;
5685         amp::ampf<Precision> v;
5686 
5687 
5688 
5689         //
5690         // Setup
5691         //
5692         if( !transa )
5693         {
5694             arows = ai2-ai1+1;
5695             acols = aj2-aj1+1;
5696         }
5697         else
5698         {
5699             arows = aj2-aj1+1;
5700             acols = ai2-ai1+1;
5701         }
5702         if( !transb )
5703         {
5704             brows = bi2-bi1+1;
5705             bcols = bj2-bj1+1;
5706         }
5707         else
5708         {
5709             brows = bj2-bj1+1;
5710             bcols = bi2-bi1+1;
5711         }
5712         ap::ap_error::make_assertion(acols==brows);
5713         if( arows<=0 || acols<=0 || brows<=0 || bcols<=0 )
5714         {
5715             return;
5716         }
5717         crows = arows;
5718         ccols = bcols;
5719 
5720         //
5721         // Test WORK
5722         //
5723         i = ap::maxint(arows, acols);
5724         i = ap::maxint(brows, i);
5725         i = ap::maxint(i, bcols);
5726         work(1) = 0;
5727         work(i) = 0;
5728 
5729         //
5730         // Prepare C
5731         //
5732         if( beta==0 )
5733         {
5734             for(i=ci1; i<=ci2; i++)
5735             {
5736                 for(j=cj1; j<=cj2; j++)
5737                 {
5738                     c(i,j) = 0;
5739                 }
5740             }
5741         }
5742         else
5743         {
5744             for(i=ci1; i<=ci2; i++)
5745             {
5746                 ap::vmul(c.getrow(i, cj1, cj2), beta);
5747             }
5748         }
5749 
5750         //
5751         // A*B
5752         //
5753         if( !transa && !transb )
5754         {
5755             for(l=ai1; l<=ai2; l++)
5756             {
5757                 for(r=bi1; r<=bi2; r++)
5758                 {
5759                     v = alpha*a(l,aj1+r-bi1);
5760                     k = ci1+l-ai1;
5761                     ap::vadd(c.getrow(k, cj1, cj2), b.getrow(r, bj1, bj2), v);
5762                 }
5763             }
5764             return;
5765         }
5766 
5767         //
5768         // A*B'
5769         //
5770         if( !transa && transb )
5771         {
5772             if( arows*acols<brows*bcols )
5773             {
5774                 for(r=bi1; r<=bi2; r++)
5775                 {
5776                     for(l=ai1; l<=ai2; l++)
5777                     {
5778                         v = ap::vdotproduct(a.getrow(l, aj1, aj2), b.getrow(r, bj1, bj2));
5779                         c(ci1+l-ai1,cj1+r-bi1) = c(ci1+l-ai1,cj1+r-bi1)+alpha*v;
5780                     }
5781                 }
5782                 return;
5783             }
5784             else
5785             {
5786                 for(l=ai1; l<=ai2; l++)
5787                 {
5788                     for(r=bi1; r<=bi2; r++)
5789                     {
5790                         v = ap::vdotproduct(a.getrow(l, aj1, aj2), b.getrow(r, bj1, bj2));
5791                         c(ci1+l-ai1,cj1+r-bi1) = c(ci1+l-ai1,cj1+r-bi1)+alpha*v;
5792                     }
5793                 }
5794                 return;
5795             }
5796         }
5797 
5798         //
5799         // A'*B
5800         //
5801         if( transa && !transb )
5802         {
5803             for(l=aj1; l<=aj2; l++)
5804             {
5805                 for(r=bi1; r<=bi2; r++)
5806                 {
5807                     v = alpha*a(ai1+r-bi1,l);
5808                     k = ci1+l-aj1;
5809                     ap::vadd(c.getrow(k, cj1, cj2), b.getrow(r, bj1, bj2), v);
5810                 }
5811             }
5812             return;
5813         }
5814 
5815         //
5816         // A'*B'
5817         //
5818         if( transa && transb )
5819         {
5820             if( arows*acols<brows*bcols )
5821             {
5822                 for(r=bi1; r<=bi2; r++)
5823                 {
5824                     for(i=1; i<=crows; i++)
5825                     {
5826                         work(i) = amp::ampf<Precision>("0.0");
5827                     }
5828                     for(l=ai1; l<=ai2; l++)
5829                     {
5830                         v = alpha*b(r,bj1+l-ai1);
5831                         k = cj1+r-bi1;
5832                         ap::vadd(work.getvector(1, crows), a.getrow(l, aj1, aj2), v);
5833                     }
5834                     ap::vadd(c.getcolumn(k, ci1, ci2), work.getvector(1, crows));
5835                 }
5836                 return;
5837             }
5838             else
5839             {
5840                 for(l=aj1; l<=aj2; l++)
5841                 {
5842                     k = ai2-ai1+1;
5843                     ap::vmove(work.getvector(1, k), a.getcolumn(l, ai1, ai2));
5844                     for(r=bi1; r<=bi2; r++)
5845                     {
5846                         v = ap::vdotproduct(work.getvector(1, k), b.getrow(r, bj1, bj2));
5847                         c(ci1+l-aj1,cj1+r-bi1) = c(ci1+l-aj1,cj1+r-bi1)+alpha*v;
5848                     }
5849                 }
5850                 return;
5851             }
5852         }
5853     }
5854 } // namespace
5855 
5856 /* stuff included from ./rotations.h */
5857 
5858 /*************************************************************************
5859 Copyright (c) 1992-2007 The University of Tennessee.  All rights reserved.
5860 
5861 Contributors:
5862     * Sergey Bochkanov (ALGLIB project). Translation from FORTRAN to
5863       pseudocode.
5864 
5865 See subroutines comments for additional copyrights.
5866 
5867 Redistribution and use in source and binary forms, with or without
5868 modification, are permitted provided that the following conditions are
5869 met:
5870 
5871 - Redistributions of source code must retain the above copyright
5872   notice, this list of conditions and the following disclaimer.
5873 
5874 - Redistributions in binary form must reproduce the above copyright
5875   notice, this list of conditions and the following disclaimer listed
5876   in this license in the documentation and/or other materials
5877   provided with the distribution.
5878 
5879 - Neither the name of the copyright holders nor the names of its
5880   contributors may be used to endorse or promote products derived from
5881   this software without specific prior written permission.
5882 
5883 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
5884 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
5885 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
5886 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
5887 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5888 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
5889 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
5890 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
5891 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
5892 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
5893 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5894 *************************************************************************/
5895 
5896 namespace rotations
5897 {
5898     template<unsigned int Precision>
5899     void applyrotationsfromtheleft(bool isforward,
5900         int m1,
5901         int m2,
5902         int n1,
5903         int n2,
5904         const ap::template_1d_array< amp::ampf<Precision> >& c,
5905         const ap::template_1d_array< amp::ampf<Precision> >& s,
5906         ap::template_2d_array< amp::ampf<Precision> >& a,
5907         ap::template_1d_array< amp::ampf<Precision> >& work);
5908     template<unsigned int Precision>
5909     void applyrotationsfromtheright(bool isforward,
5910         int m1,
5911         int m2,
5912         int n1,
5913         int n2,
5914         const ap::template_1d_array< amp::ampf<Precision> >& c,
5915         const ap::template_1d_array< amp::ampf<Precision> >& s,
5916         ap::template_2d_array< amp::ampf<Precision> >& a,
5917         ap::template_1d_array< amp::ampf<Precision> >& work);
5918     template<unsigned int Precision>
5919     void generaterotation(amp::ampf<Precision> f,
5920         amp::ampf<Precision> g,
5921         amp::ampf<Precision>& cs,
5922         amp::ampf<Precision>& sn,
5923         amp::ampf<Precision>& r);
5924 
5925 
5926     /*************************************************************************
5927     Application of a sequence of  elementary rotations to a matrix
5928 
5929     The algorithm pre-multiplies the matrix by a sequence of rotation
5930     transformations which is given by arrays C and S. Depending on the value
5931     of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
5932     rows are rotated, or the rows N and N-1, N-2 and N-3 and so on, are rotated.
5933 
5934     Not the whole matrix but only a part of it is transformed (rows from M1 to
5935     M2, columns from N1 to N2). Only the elements of this submatrix are changed.
5936 
5937     Input parameters:
5938         IsForward   -   the sequence of the rotation application.
5939         M1,M2       -   the range of rows to be transformed.
5940         N1, N2      -   the range of columns to be transformed.
5941         C,S         -   transformation coefficients.
5942                         Array whose index ranges within [1..M2-M1].
5943         A           -   processed matrix.
5944         WORK        -   working array whose index ranges within [N1..N2].
5945 
5946     Output parameters:
5947         A           -   transformed matrix.
5948 
5949     Utility subroutine.
5950     *************************************************************************/
5951     template<unsigned int Precision>
applyrotationsfromtheleft(bool isforward,int m1,int m2,int n1,int n2,const ap::template_1d_array<amp::ampf<Precision>> & c,const ap::template_1d_array<amp::ampf<Precision>> & s,ap::template_2d_array<amp::ampf<Precision>> & a,ap::template_1d_array<amp::ampf<Precision>> & work)5952     void applyrotationsfromtheleft(bool isforward,
5953         int m1,
5954         int m2,
5955         int n1,
5956         int n2,
5957         const ap::template_1d_array< amp::ampf<Precision> >& c,
5958         const ap::template_1d_array< amp::ampf<Precision> >& s,
5959         ap::template_2d_array< amp::ampf<Precision> >& a,
5960         ap::template_1d_array< amp::ampf<Precision> >& work)
5961     {
5962         int j;
5963         int jp1;
5964         amp::ampf<Precision> ctemp;
5965         amp::ampf<Precision> stemp;
5966         amp::ampf<Precision> temp;
5967 
5968 
5969         if( m1>m2 || n1>n2 )
5970         {
5971             return;
5972         }
5973 
5974         //
5975         // Form  P * A
5976         //
5977         if( isforward )
5978         {
5979             if( n1!=n2 )
5980             {
5981 
5982                 //
5983                 // Common case: N1<>N2
5984                 //
5985                 for(j=m1; j<=m2-1; j++)
5986                 {
5987                     ctemp = c(j-m1+1);
5988                     stemp = s(j-m1+1);
5989                     if( ctemp!=1 || stemp!=0 )
5990                     {
5991                         jp1 = j+1;
5992                         ap::vmove(work.getvector(n1, n2), a.getrow(jp1, n1, n2), ctemp);
5993                         ap::vsub(work.getvector(n1, n2), a.getrow(j, n1, n2), stemp);
5994                         ap::vmul(a.getrow(j, n1, n2), ctemp);
5995                         ap::vadd(a.getrow(j, n1, n2), a.getrow(jp1, n1, n2), stemp);
5996                         ap::vmove(a.getrow(jp1, n1, n2), work.getvector(n1, n2));
5997                     }
5998                 }
5999             }
6000             else
6001             {
6002 
6003                 //
6004                 // Special case: N1=N2
6005                 //
6006                 for(j=m1; j<=m2-1; j++)
6007                 {
6008                     ctemp = c(j-m1+1);
6009                     stemp = s(j-m1+1);
6010                     if( ctemp!=1 || stemp!=0 )
6011                     {
6012                         temp = a(j+1,n1);
6013                         a(j+1,n1) = ctemp*temp-stemp*a(j,n1);
6014                         a(j,n1) = stemp*temp+ctemp*a(j,n1);
6015                     }
6016                 }
6017             }
6018         }
6019         else
6020         {
6021             if( n1!=n2 )
6022             {
6023 
6024                 //
6025                 // Common case: N1<>N2
6026                 //
6027                 for(j=m2-1; j>=m1; j--)
6028                 {
6029                     ctemp = c(j-m1+1);
6030                     stemp = s(j-m1+1);
6031                     if( ctemp!=1 || stemp!=0 )
6032                     {
6033                         jp1 = j+1;
6034                         ap::vmove(work.getvector(n1, n2), a.getrow(jp1, n1, n2), ctemp);
6035                         ap::vsub(work.getvector(n1, n2), a.getrow(j, n1, n2), stemp);
6036                         ap::vmul(a.getrow(j, n1, n2), ctemp);
6037                         ap::vadd(a.getrow(j, n1, n2), a.getrow(jp1, n1, n2), stemp);
6038                         ap::vmove(a.getrow(jp1, n1, n2), work.getvector(n1, n2));
6039                     }
6040                 }
6041             }
6042             else
6043             {
6044 
6045                 //
6046                 // Special case: N1=N2
6047                 //
6048                 for(j=m2-1; j>=m1; j--)
6049                 {
6050                     ctemp = c(j-m1+1);
6051                     stemp = s(j-m1+1);
6052                     if( ctemp!=1 || stemp!=0 )
6053                     {
6054                         temp = a(j+1,n1);
6055                         a(j+1,n1) = ctemp*temp-stemp*a(j,n1);
6056                         a(j,n1) = stemp*temp+ctemp*a(j,n1);
6057                     }
6058                 }
6059             }
6060         }
6061     }
6062 
6063 
6064     /*************************************************************************
6065     Application of a sequence of  elementary rotations to a matrix
6066 
6067     The algorithm post-multiplies the matrix by a sequence of rotation
6068     transformations which is given by arrays C and S. Depending on the value
6069     of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
6070     rows are rotated, or the rows N and N-1, N-2 and N-3 and so on are rotated.
6071 
6072     Not the whole matrix but only a part of it is transformed (rows from M1
6073     to M2, columns from N1 to N2). Only the elements of this submatrix are changed.
6074 
6075     Input parameters:
6076         IsForward   -   the sequence of the rotation application.
6077         M1,M2       -   the range of rows to be transformed.
6078         N1, N2      -   the range of columns to be transformed.
6079         C,S         -   transformation coefficients.
6080                         Array whose index ranges within [1..N2-N1].
6081         A           -   processed matrix.
6082         WORK        -   working array whose index ranges within [M1..M2].
6083 
6084     Output parameters:
6085         A           -   transformed matrix.
6086 
6087     Utility subroutine.
6088     *************************************************************************/
6089     template<unsigned int Precision>
applyrotationsfromtheright(bool isforward,int m1,int m2,int n1,int n2,const ap::template_1d_array<amp::ampf<Precision>> & c,const ap::template_1d_array<amp::ampf<Precision>> & s,ap::template_2d_array<amp::ampf<Precision>> & a,ap::template_1d_array<amp::ampf<Precision>> & work)6090     void applyrotationsfromtheright(bool isforward,
6091         int m1,
6092         int m2,
6093         int n1,
6094         int n2,
6095         const ap::template_1d_array< amp::ampf<Precision> >& c,
6096         const ap::template_1d_array< amp::ampf<Precision> >& s,
6097         ap::template_2d_array< amp::ampf<Precision> >& a,
6098         ap::template_1d_array< amp::ampf<Precision> >& work)
6099     {
6100         int j;
6101         int jp1;
6102         amp::ampf<Precision> ctemp;
6103         amp::ampf<Precision> stemp;
6104         amp::ampf<Precision> temp;
6105 
6106 
6107 
6108         //
6109         // Form A * P'
6110         //
6111         if( isforward )
6112         {
6113             if( m1!=m2 )
6114             {
6115 
6116                 //
6117                 // Common case: M1<>M2
6118                 //
6119                 for(j=n1; j<=n2-1; j++)
6120                 {
6121                     ctemp = c(j-n1+1);
6122                     stemp = s(j-n1+1);
6123                     if( ctemp!=1 || stemp!=0 )
6124                     {
6125                         jp1 = j+1;
6126                         ap::vmove(work.getvector(m1, m2), a.getcolumn(jp1, m1, m2), ctemp);
6127                         ap::vsub(work.getvector(m1, m2), a.getcolumn(j, m1, m2), stemp);
6128                         ap::vmul(a.getcolumn(j, m1, m2), ctemp);
6129                         ap::vadd(a.getcolumn(j, m1, m2), a.getcolumn(jp1, m1, m2), stemp);
6130                         ap::vmove(a.getcolumn(jp1, m1, m2), work.getvector(m1, m2));
6131                     }
6132                 }
6133             }
6134             else
6135             {
6136 
6137                 //
6138                 // Special case: M1=M2
6139                 //
6140                 for(j=n1; j<=n2-1; j++)
6141                 {
6142                     ctemp = c(j-n1+1);
6143                     stemp = s(j-n1+1);
6144                     if( ctemp!=1 || stemp!=0 )
6145                     {
6146                         temp = a(m1,j+1);
6147                         a(m1,j+1) = ctemp*temp-stemp*a(m1,j);
6148                         a(m1,j) = stemp*temp+ctemp*a(m1,j);
6149                     }
6150                 }
6151             }
6152         }
6153         else
6154         {
6155             if( m1!=m2 )
6156             {
6157 
6158                 //
6159                 // Common case: M1<>M2
6160                 //
6161                 for(j=n2-1; j>=n1; j--)
6162                 {
6163                     ctemp = c(j-n1+1);
6164                     stemp = s(j-n1+1);
6165                     if( ctemp!=1 || stemp!=0 )
6166                     {
6167                         jp1 = j+1;
6168                         ap::vmove(work.getvector(m1, m2), a.getcolumn(jp1, m1, m2), ctemp);
6169                         ap::vsub(work.getvector(m1, m2), a.getcolumn(j, m1, m2), stemp);
6170                         ap::vmul(a.getcolumn(j, m1, m2), ctemp);
6171                         ap::vadd(a.getcolumn(j, m1, m2), a.getcolumn(jp1, m1, m2), stemp);
6172                         ap::vmove(a.getcolumn(jp1, m1, m2), work.getvector(m1, m2));
6173                     }
6174                 }
6175             }
6176             else
6177             {
6178 
6179                 //
6180                 // Special case: M1=M2
6181                 //
6182                 for(j=n2-1; j>=n1; j--)
6183                 {
6184                     ctemp = c(j-n1+1);
6185                     stemp = s(j-n1+1);
6186                     if( ctemp!=1 || stemp!=0 )
6187                     {
6188                         temp = a(m1,j+1);
6189                         a(m1,j+1) = ctemp*temp-stemp*a(m1,j);
6190                         a(m1,j) = stemp*temp+ctemp*a(m1,j);
6191                     }
6192                 }
6193             }
6194         }
6195     }
6196 
6197 
6198     /*************************************************************************
6199     The subroutine generates the elementary rotation, so that:
6200 
6201     [  CS  SN  ]  .  [ F ]  =  [ R ]
6202     [ -SN  CS  ]     [ G ]     [ 0 ]
6203 
6204     CS**2 + SN**2 = 1
6205     *************************************************************************/
6206     template<unsigned int Precision>
generaterotation(amp::ampf<Precision> f,amp::ampf<Precision> g,amp::ampf<Precision> & cs,amp::ampf<Precision> & sn,amp::ampf<Precision> & r)6207     void generaterotation(amp::ampf<Precision> f,
6208         amp::ampf<Precision> g,
6209         amp::ampf<Precision>& cs,
6210         amp::ampf<Precision>& sn,
6211         amp::ampf<Precision>& r)
6212     {
6213         amp::ampf<Precision> f1;
6214         amp::ampf<Precision> g1;
6215 
6216 
6217         if( g==0 )
6218         {
6219             cs = 1;
6220             sn = 0;
6221             r = f;
6222         }
6223         else
6224         {
6225             if( f==0 )
6226             {
6227                 cs = 0;
6228                 sn = 1;
6229                 r = g;
6230             }
6231             else
6232             {
6233                 f1 = f;
6234                 g1 = g;
6235                 r = amp::sqrt<Precision>(amp::sqr<Precision>(f1)+amp::sqr<Precision>(g1));
6236                 cs = f1/r;
6237                 sn = g1/r;
6238                 if( amp::abs<Precision>(f)>amp::abs<Precision>(g) && cs<0 )
6239                 {
6240                     cs = -cs;
6241                     sn = -sn;
6242                     r = -r;
6243                 }
6244             }
6245         }
6246     }
6247 } // namespace
6248 
6249 /* stuff included from ./bdsvd.h */
6250 
6251 /*************************************************************************
6252 Copyright (c) 1992-2007 The University of Tennessee.  All rights reserved.
6253 
6254 Contributors:
6255     * Sergey Bochkanov (ALGLIB project). Translation from FORTRAN to
6256       pseudocode.
6257 
6258 See subroutines comments for additional copyrights.
6259 
6260 Redistribution and use in source and binary forms, with or without
6261 modification, are permitted provided that the following conditions are
6262 met:
6263 
6264 - Redistributions of source code must retain the above copyright
6265   notice, this list of conditions and the following disclaimer.
6266 
6267 - Redistributions in binary form must reproduce the above copyright
6268   notice, this list of conditions and the following disclaimer listed
6269   in this license in the documentation and/or other materials
6270   provided with the distribution.
6271 
6272 - Neither the name of the copyright holders nor the names of its
6273   contributors may be used to endorse or promote products derived from
6274   this software without specific prior written permission.
6275 
6276 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
6277 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
6278 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
6279 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
6280 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
6281 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
6282 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
6283 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
6284 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
6285 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
6286 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6287 *************************************************************************/
6288 
6289 namespace bdsvd
6290 {
6291     template<unsigned int Precision>
6292     bool rmatrixbdsvd(ap::template_1d_array< amp::ampf<Precision> >& d,
6293         ap::template_1d_array< amp::ampf<Precision> > e,
6294         int n,
6295         bool isupper,
6296         bool isfractionalaccuracyrequired,
6297         ap::template_2d_array< amp::ampf<Precision> >& u,
6298         int nru,
6299         ap::template_2d_array< amp::ampf<Precision> >& c,
6300         int ncc,
6301         ap::template_2d_array< amp::ampf<Precision> >& vt,
6302         int ncvt);
6303     template<unsigned int Precision>
6304     bool bidiagonalsvddecomposition(ap::template_1d_array< amp::ampf<Precision> >& d,
6305         ap::template_1d_array< amp::ampf<Precision> > e,
6306         int n,
6307         bool isupper,
6308         bool isfractionalaccuracyrequired,
6309         ap::template_2d_array< amp::ampf<Precision> >& u,
6310         int nru,
6311         ap::template_2d_array< amp::ampf<Precision> >& c,
6312         int ncc,
6313         ap::template_2d_array< amp::ampf<Precision> >& vt,
6314         int ncvt);
6315     template<unsigned int Precision>
6316     bool bidiagonalsvddecompositioninternal(ap::template_1d_array< amp::ampf<Precision> >& d,
6317         ap::template_1d_array< amp::ampf<Precision> > e,
6318         int n,
6319         bool isupper,
6320         bool isfractionalaccuracyrequired,
6321         ap::template_2d_array< amp::ampf<Precision> >& u,
6322         int ustart,
6323         int nru,
6324         ap::template_2d_array< amp::ampf<Precision> >& c,
6325         int cstart,
6326         int ncc,
6327         ap::template_2d_array< amp::ampf<Precision> >& vt,
6328         int vstart,
6329         int ncvt);
6330     template<unsigned int Precision>
6331     amp::ampf<Precision> extsignbdsqr(amp::ampf<Precision> a,
6332         amp::ampf<Precision> b);
6333     template<unsigned int Precision>
6334     void svd2x2(amp::ampf<Precision> f,
6335         amp::ampf<Precision> g,
6336         amp::ampf<Precision> h,
6337         amp::ampf<Precision>& ssmin,
6338         amp::ampf<Precision>& ssmax);
6339     template<unsigned int Precision>
6340     void svdv2x2(amp::ampf<Precision> f,
6341         amp::ampf<Precision> g,
6342         amp::ampf<Precision> h,
6343         amp::ampf<Precision>& ssmin,
6344         amp::ampf<Precision>& ssmax,
6345         amp::ampf<Precision>& snr,
6346         amp::ampf<Precision>& csr,
6347         amp::ampf<Precision>& snl,
6348         amp::ampf<Precision>& csl);
6349 
6350 
6351     /*************************************************************************
6352     Singular value decomposition of a bidiagonal matrix (extended algorithm)
6353 
6354     The algorithm performs the singular value decomposition  of  a  bidiagonal
6355     matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
6356     orthogonal matrices, S - diagonal matrix with non-negative elements on the
6357     main diagonal, in descending order.
6358 
6359     The  algorithm  finds  singular  values.  In  addition,  the algorithm can
6360     calculate  matrices  Q  and P (more precisely, not the matrices, but their
6361     product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
6362     matrices U and VT can be of any type, including identity. Furthermore, the
6363     algorithm can calculate Q'*C (this product is calculated more  effectively
6364     than U*Q,  because  this calculation operates with rows instead  of matrix
6365     columns).
6366 
6367     The feature of the algorithm is its ability to find  all  singular  values
6368     including those which are arbitrarily close to 0  with  relative  accuracy
6369     close to  machine precision. If the parameter IsFractionalAccuracyRequired
6370     is set to True, all singular values will have high relative accuracy close
6371     to machine precision. If the parameter is set to False, only  the  biggest
6372     singular value will have relative accuracy  close  to  machine  precision.
6373     The absolute error of other singular values is equal to the absolute error
6374     of the biggest singular value.
6375 
6376     Input parameters:
6377         D       -   main diagonal of matrix B.
6378                     Array whose index ranges within [0..N-1].
6379         E       -   superdiagonal (or subdiagonal) of matrix B.
6380                     Array whose index ranges within [0..N-2].
6381         N       -   size of matrix B.
6382         IsUpper -   True, if the matrix is upper bidiagonal.
6383         IsFractionalAccuracyRequired -
6384                     accuracy to search singular values with.
6385         U       -   matrix to be multiplied by Q.
6386                     Array whose indexes range within [0..NRU-1, 0..N-1].
6387                     The matrix can be bigger, in that case only the  submatrix
6388                     [0..NRU-1, 0..N-1] will be multiplied by Q.
6389         NRU     -   number of rows in matrix U.
6390         C       -   matrix to be multiplied by Q'.
6391                     Array whose indexes range within [0..N-1, 0..NCC-1].
6392                     The matrix can be bigger, in that case only the  submatrix
6393                     [0..N-1, 0..NCC-1] will be multiplied by Q'.
6394         NCC     -   number of columns in matrix C.
6395         VT      -   matrix to be multiplied by P^T.
6396                     Array whose indexes range within [0..N-1, 0..NCVT-1].
6397                     The matrix can be bigger, in that case only the  submatrix
6398                     [0..N-1, 0..NCVT-1] will be multiplied by P^T.
6399         NCVT    -   number of columns in matrix VT.
6400 
6401     Output parameters:
6402         D       -   singular values of matrix B in descending order.
6403         U       -   if NRU>0, contains matrix U*Q.
6404         VT      -   if NCVT>0, contains matrix (P^T)*VT.
6405         C       -   if NCC>0, contains matrix Q'*C.
6406 
6407     Result:
6408         True, if the algorithm has converged.
6409         False, if the algorithm hasn't converged (rare case).
6410 
6411     Additional information:
6412         The type of convergence is controlled by the internal  parameter  TOL.
6413         If the parameter is greater than 0, the singular values will have
6414         relative accuracy TOL. If TOL<0, the singular values will have
6415         absolute accuracy ABS(TOL)*norm(B).
6416         By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
6417         where Epsilon is the machine precision. It is not  recommended  to  use
6418         TOL less than 10*Epsilon since this will  considerably  slow  down  the
6419         algorithm and may not lead to error decreasing.
6420     History:
6421         * 31 March, 2007.
6422             changed MAXITR from 6 to 12.
6423 
6424       -- LAPACK routine (version 3.0) --
6425          Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6426          Courant Institute, Argonne National Lab, and Rice University
6427          October 31, 1999.
6428     *************************************************************************/
6429     template<unsigned int Precision>
rmatrixbdsvd(ap::template_1d_array<amp::ampf<Precision>> & d,ap::template_1d_array<amp::ampf<Precision>> e,int n,bool isupper,bool isfractionalaccuracyrequired,ap::template_2d_array<amp::ampf<Precision>> & u,int nru,ap::template_2d_array<amp::ampf<Precision>> & c,int ncc,ap::template_2d_array<amp::ampf<Precision>> & vt,int ncvt)6430     bool rmatrixbdsvd(ap::template_1d_array< amp::ampf<Precision> >& d,
6431         ap::template_1d_array< amp::ampf<Precision> > e,
6432         int n,
6433         bool isupper,
6434         bool isfractionalaccuracyrequired,
6435         ap::template_2d_array< amp::ampf<Precision> >& u,
6436         int nru,
6437         ap::template_2d_array< amp::ampf<Precision> >& c,
6438         int ncc,
6439         ap::template_2d_array< amp::ampf<Precision> >& vt,
6440         int ncvt)
6441     {
6442         bool result;
6443         ap::template_1d_array< amp::ampf<Precision> > d1;
6444         ap::template_1d_array< amp::ampf<Precision> > e1;
6445 
6446 
6447         d1.setbounds(1, n);
6448         ap::vmove(d1.getvector(1, n), d.getvector(0, n-1));
6449         if( n>1 )
6450         {
6451             e1.setbounds(1, n-1);
6452             ap::vmove(e1.getvector(1, n-1), e.getvector(0, n-2));
6453         }
6454         result = bidiagonalsvddecompositioninternal<Precision>(d1, e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt);
6455         ap::vmove(d.getvector(0, n-1), d1.getvector(1, n));
6456         return result;
6457     }
6458 
6459 
6460     /*************************************************************************
6461     Obsolete 1-based subroutine. See RMatrixBDSVD for 0-based replacement.
6462 
6463     History:
6464         * 31 March, 2007.
6465             changed MAXITR from 6 to 12.
6466 
6467       -- LAPACK routine (version 3.0) --
6468          Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6469          Courant Institute, Argonne National Lab, and Rice University
6470          October 31, 1999.
6471     *************************************************************************/
6472     template<unsigned int Precision>
bidiagonalsvddecomposition(ap::template_1d_array<amp::ampf<Precision>> & d,ap::template_1d_array<amp::ampf<Precision>> e,int n,bool isupper,bool isfractionalaccuracyrequired,ap::template_2d_array<amp::ampf<Precision>> & u,int nru,ap::template_2d_array<amp::ampf<Precision>> & c,int ncc,ap::template_2d_array<amp::ampf<Precision>> & vt,int ncvt)6473     bool bidiagonalsvddecomposition(ap::template_1d_array< amp::ampf<Precision> >& d,
6474         ap::template_1d_array< amp::ampf<Precision> > e,
6475         int n,
6476         bool isupper,
6477         bool isfractionalaccuracyrequired,
6478         ap::template_2d_array< amp::ampf<Precision> >& u,
6479         int nru,
6480         ap::template_2d_array< amp::ampf<Precision> >& c,
6481         int ncc,
6482         ap::template_2d_array< amp::ampf<Precision> >& vt,
6483         int ncvt)
6484     {
6485         bool result;
6486 
6487 
6488         result = bidiagonalsvddecompositioninternal<Precision>(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt);
6489         return result;
6490     }
6491 
6492 
6493     /*************************************************************************
6494     Internal working subroutine for bidiagonal decomposition
6495     *************************************************************************/
6496     template<unsigned int Precision>
bidiagonalsvddecompositioninternal(ap::template_1d_array<amp::ampf<Precision>> & d,ap::template_1d_array<amp::ampf<Precision>> e,int n,bool isupper,bool isfractionalaccuracyrequired,ap::template_2d_array<amp::ampf<Precision>> & u,int ustart,int nru,ap::template_2d_array<amp::ampf<Precision>> & c,int cstart,int ncc,ap::template_2d_array<amp::ampf<Precision>> & vt,int vstart,int ncvt)6497     bool bidiagonalsvddecompositioninternal(ap::template_1d_array< amp::ampf<Precision> >& d,
6498         ap::template_1d_array< amp::ampf<Precision> > e,
6499         int n,
6500         bool isupper,
6501         bool isfractionalaccuracyrequired,
6502         ap::template_2d_array< amp::ampf<Precision> >& u,
6503         int ustart,
6504         int nru,
6505         ap::template_2d_array< amp::ampf<Precision> >& c,
6506         int cstart,
6507         int ncc,
6508         ap::template_2d_array< amp::ampf<Precision> >& vt,
6509         int vstart,
6510         int ncvt)
6511     {
6512         bool result;
6513         int i;
6514         int idir;
6515         int isub;
6516         int iter;
6517         int j;
6518         int ll;
6519         int lll;
6520         int m;
6521         int maxit;
6522         int oldll;
6523         int oldm;
6524         amp::ampf<Precision> abse;
6525         amp::ampf<Precision> abss;
6526         amp::ampf<Precision> cosl;
6527         amp::ampf<Precision> cosr;
6528         amp::ampf<Precision> cs;
6529         amp::ampf<Precision> eps;
6530         amp::ampf<Precision> f;
6531         amp::ampf<Precision> g;
6532         amp::ampf<Precision> h;
6533         amp::ampf<Precision> mu;
6534         amp::ampf<Precision> oldcs;
6535         amp::ampf<Precision> oldsn;
6536         amp::ampf<Precision> r;
6537         amp::ampf<Precision> shift;
6538         amp::ampf<Precision> sigmn;
6539         amp::ampf<Precision> sigmx;
6540         amp::ampf<Precision> sinl;
6541         amp::ampf<Precision> sinr;
6542         amp::ampf<Precision> sll;
6543         amp::ampf<Precision> smax;
6544         amp::ampf<Precision> smin;
6545         amp::ampf<Precision> sminl;
6546         amp::ampf<Precision> sminlo;
6547         amp::ampf<Precision> sminoa;
6548         amp::ampf<Precision> sn;
6549         amp::ampf<Precision> thresh;
6550         amp::ampf<Precision> tol;
6551         amp::ampf<Precision> tolmul;
6552         amp::ampf<Precision> unfl;
6553         ap::template_1d_array< amp::ampf<Precision> > work0;
6554         ap::template_1d_array< amp::ampf<Precision> > work1;
6555         ap::template_1d_array< amp::ampf<Precision> > work2;
6556         ap::template_1d_array< amp::ampf<Precision> > work3;
6557         int maxitr;
6558         bool matrixsplitflag;
6559         bool iterflag;
6560         ap::template_1d_array< amp::ampf<Precision> > utemp;
6561         ap::template_1d_array< amp::ampf<Precision> > vttemp;
6562         ap::template_1d_array< amp::ampf<Precision> > ctemp;
6563         ap::template_1d_array< amp::ampf<Precision> > etemp;
6564         bool fwddir;
6565         amp::ampf<Precision> tmp;
6566         int mm1;
6567         int mm0;
6568         bool bchangedir;
6569         int uend;
6570         int cend;
6571         int vend;
6572 
6573 
6574         result = true;
6575         if( n==0 )
6576         {
6577             return result;
6578         }
6579         if( n==1 )
6580         {
6581             if( d(1)<0 )
6582             {
6583                 d(1) = -d(1);
6584                 if( ncvt>0 )
6585                 {
6586                     ap::vmul(vt.getrow(vstart, vstart, vstart+ncvt-1), -1);
6587                 }
6588             }
6589             return result;
6590         }
6591 
6592         //
6593         // init
6594         //
6595         work0.setbounds(1, n-1);
6596         work1.setbounds(1, n-1);
6597         work2.setbounds(1, n-1);
6598         work3.setbounds(1, n-1);
6599         uend = ustart+ap::maxint(nru-1, 0);
6600         vend = vstart+ap::maxint(ncvt-1, 0);
6601         cend = cstart+ap::maxint(ncc-1, 0);
6602         utemp.setbounds(ustart, uend);
6603         vttemp.setbounds(vstart, vend);
6604         ctemp.setbounds(cstart, cend);
6605         maxitr = 12;
6606         fwddir = true;
6607 
6608         //
6609         // resize E from N-1 to N
6610         //
6611         etemp.setbounds(1, n);
6612         for(i=1; i<=n-1; i++)
6613         {
6614             etemp(i) = e(i);
6615         }
6616         e.setbounds(1, n);
6617         for(i=1; i<=n-1; i++)
6618         {
6619             e(i) = etemp(i);
6620         }
6621         e(n) = 0;
6622         idir = 0;
6623 
6624         //
6625         // Get machine constants
6626         //
6627         eps = amp::ampf<Precision>::getAlgoPascalEpsilon();
6628         unfl = amp::ampf<Precision>::getAlgoPascalMinNumber();
6629 
6630         //
6631         // If matrix lower bidiagonal, rotate to be upper bidiagonal
6632         // by applying Givens rotations on the left
6633         //
6634         if( !isupper )
6635         {
6636             for(i=1; i<=n-1; i++)
6637             {
6638                 rotations::generaterotation<Precision>(d(i), e(i), cs, sn, r);
6639                 d(i) = r;
6640                 e(i) = sn*d(i+1);
6641                 d(i+1) = cs*d(i+1);
6642                 work0(i) = cs;
6643                 work1(i) = sn;
6644             }
6645 
6646             //
6647             // Update singular vectors if desired
6648             //
6649             if( nru>0 )
6650             {
6651                 rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, work0, work1, u, utemp);
6652             }
6653             if( ncc>0 )
6654             {
6655                 rotations::applyrotationsfromtheleft<Precision>(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp);
6656             }
6657         }
6658 
6659         //
6660         // Compute singular values to relative accuracy TOL
6661         // (By setting TOL to be negative, algorithm will compute
6662         // singular values to absolute accuracy ABS(TOL)*norm(input matrix))
6663         //
6664         tolmul = amp::maximum<Precision>(10, amp::minimum<Precision>(100, amp::pow<Precision>(eps, -amp::ampf<Precision>("0.125"))));
6665         tol = tolmul*eps;
6666         if( !isfractionalaccuracyrequired )
6667         {
6668             tol = -tol;
6669         }
6670 
6671         //
6672         // Compute approximate maximum, minimum singular values
6673         //
6674         smax = 0;
6675         for(i=1; i<=n; i++)
6676         {
6677             smax = amp::maximum<Precision>(smax, amp::abs<Precision>(d(i)));
6678         }
6679         for(i=1; i<=n-1; i++)
6680         {
6681             smax = amp::maximum<Precision>(smax, amp::abs<Precision>(e(i)));
6682         }
6683         sminl = 0;
6684         if( tol>=0 )
6685         {
6686 
6687             //
6688             // Relative accuracy desired
6689             //
6690             sminoa = amp::abs<Precision>(d(1));
6691             if( sminoa!=0 )
6692             {
6693                 mu = sminoa;
6694                 for(i=2; i<=n; i++)
6695                 {
6696                     mu = amp::abs<Precision>(d(i))*(mu/(mu+amp::abs<Precision>(e(i-1))));
6697                     sminoa = amp::minimum<Precision>(sminoa, mu);
6698                     if( sminoa==0 )
6699                     {
6700                         break;
6701                     }
6702                 }
6703             }
6704             sminoa = sminoa/amp::sqrt<Precision>(n);
6705             thresh = amp::maximum<Precision>(tol*sminoa, maxitr*n*n*unfl);
6706         }
6707         else
6708         {
6709 
6710             //
6711             // Absolute accuracy desired
6712             //
6713             thresh = amp::maximum<Precision>(amp::abs<Precision>(tol)*smax, maxitr*n*n*unfl);
6714         }
6715 
6716         //
6717         // Prepare for main iteration loop for the singular values
6718         // (MAXIT is the maximum number of passes through the inner
6719         // loop permitted before nonconvergence signalled.)
6720         //
6721         maxit = maxitr*n*n;
6722         iter = 0;
6723         oldll = -1;
6724         oldm = -1;
6725 
6726         //
6727         // M points to last element of unconverged part of matrix
6728         //
6729         m = n;
6730 
6731         //
6732         // Begin main iteration loop
6733         //
6734         while( true )
6735         {
6736 
6737             //
6738             // Check for convergence or exceeding iteration count
6739             //
6740             if( m<=1 )
6741             {
6742                 break;
6743             }
6744             if( iter>maxit )
6745             {
6746                 result = false;
6747                 return result;
6748             }
6749 
6750             //
6751             // Find diagonal block of matrix to work on
6752             //
6753             if( tol<0 && amp::abs<Precision>(d(m))<=thresh )
6754             {
6755                 d(m) = 0;
6756             }
6757             smax = amp::abs<Precision>(d(m));
6758             smin = smax;
6759             matrixsplitflag = false;
6760             for(lll=1; lll<=m-1; lll++)
6761             {
6762                 ll = m-lll;
6763                 abss = amp::abs<Precision>(d(ll));
6764                 abse = amp::abs<Precision>(e(ll));
6765                 if( tol<0 && abss<=thresh )
6766                 {
6767                     d(ll) = 0;
6768                 }
6769                 if( abse<=thresh )
6770                 {
6771                     matrixsplitflag = true;
6772                     break;
6773                 }
6774                 smin = amp::minimum<Precision>(smin, abss);
6775                 smax = amp::maximum<Precision>(smax, amp::maximum<Precision>(abss, abse));
6776             }
6777             if( !matrixsplitflag )
6778             {
6779                 ll = 0;
6780             }
6781             else
6782             {
6783 
6784                 //
6785                 // Matrix splits since E(LL) = 0
6786                 //
6787                 e(ll) = 0;
6788                 if( ll==m-1 )
6789                 {
6790 
6791                     //
6792                     // Convergence of bottom singular value, return to top of loop
6793                     //
6794                     m = m-1;
6795                     continue;
6796                 }
6797             }
6798             ll = ll+1;
6799 
6800             //
6801             // E(LL) through E(M-1) are nonzero, E(LL-1) is zero
6802             //
6803             if( ll==m-1 )
6804             {
6805 
6806                 //
6807                 // 2 by 2 block, handle separately
6808                 //
6809                 svdv2x2<Precision>(d(m-1), e(m-1), d(m), sigmn, sigmx, sinr, cosr, sinl, cosl);
6810                 d(m-1) = sigmx;
6811                 e(m-1) = 0;
6812                 d(m) = sigmn;
6813 
6814                 //
6815                 // Compute singular vectors, if desired
6816                 //
6817                 if( ncvt>0 )
6818                 {
6819                     mm0 = m+(vstart-1);
6820                     mm1 = m-1+(vstart-1);
6821                     ap::vmove(vttemp.getvector(vstart, vend), vt.getrow(mm1, vstart, vend), cosr);
6822                     ap::vadd(vttemp.getvector(vstart, vend), vt.getrow(mm0, vstart, vend), sinr);
6823                     ap::vmul(vt.getrow(mm0, vstart, vend), cosr);
6824                     ap::vsub(vt.getrow(mm0, vstart, vend), vt.getrow(mm1, vstart, vend), sinr);
6825                     ap::vmove(vt.getrow(mm1, vstart, vend), vttemp.getvector(vstart, vend));
6826                 }
6827                 if( nru>0 )
6828                 {
6829                     mm0 = m+ustart-1;
6830                     mm1 = m-1+ustart-1;
6831                     ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(mm1, ustart, uend), cosl);
6832                     ap::vadd(utemp.getvector(ustart, uend), u.getcolumn(mm0, ustart, uend), sinl);
6833                     ap::vmul(u.getcolumn(mm0, ustart, uend), cosl);
6834                     ap::vsub(u.getcolumn(mm0, ustart, uend), u.getcolumn(mm1, ustart, uend), sinl);
6835                     ap::vmove(u.getcolumn(mm1, ustart, uend), utemp.getvector(ustart, uend));
6836                 }
6837                 if( ncc>0 )
6838                 {
6839                     mm0 = m+cstart-1;
6840                     mm1 = m-1+cstart-1;
6841                     ap::vmove(ctemp.getvector(cstart, cend), c.getrow(mm1, cstart, cend), cosl);
6842                     ap::vadd(ctemp.getvector(cstart, cend), c.getrow(mm0, cstart, cend), sinl);
6843                     ap::vmul(c.getrow(mm0, cstart, cend), cosl);
6844                     ap::vsub(c.getrow(mm0, cstart, cend), c.getrow(mm1, cstart, cend), sinl);
6845                     ap::vmove(c.getrow(mm1, cstart, cend), ctemp.getvector(cstart, cend));
6846                 }
6847                 m = m-2;
6848                 continue;
6849             }
6850 
6851             //
6852             // If working on new submatrix, choose shift direction
6853             // (from larger end diagonal element towards smaller)
6854             //
6855             // Previously was
6856             //     "if (LL>OLDM) or (M<OLDLL) then"
6857             // fixed thanks to Michael Rolle < m@rolle.name >
6858             // Very strange that LAPACK still contains it.
6859             //
6860             bchangedir = false;
6861             if( idir==1 && amp::abs<Precision>(d(ll))<amp::ampf<Precision>("1.0E-3")*amp::abs<Precision>(d(m)) )
6862             {
6863                 bchangedir = true;
6864             }
6865             if( idir==2 && amp::abs<Precision>(d(m))<amp::ampf<Precision>("1.0E-3")*amp::abs<Precision>(d(ll)) )
6866             {
6867                 bchangedir = true;
6868             }
6869             if( ll!=oldll || m!=oldm || bchangedir )
6870             {
6871                 if( amp::abs<Precision>(d(ll))>=amp::abs<Precision>(d(m)) )
6872                 {
6873 
6874                     //
6875                     // Chase bulge from top (big end) to bottom (small end)
6876                     //
6877                     idir = 1;
6878                 }
6879                 else
6880                 {
6881 
6882                     //
6883                     // Chase bulge from bottom (big end) to top (small end)
6884                     //
6885                     idir = 2;
6886                 }
6887             }
6888 
6889             //
6890             // Apply convergence tests
6891             //
6892             if( idir==1 )
6893             {
6894 
6895                 //
6896                 // Run convergence test in forward direction
6897                 // First apply standard test to bottom of matrix
6898                 //
6899                 if( amp::abs<Precision>(e(m-1))<=amp::abs<Precision>(tol)*amp::abs<Precision>(d(m)) || (tol<0 && amp::abs<Precision>(e(m-1))<=thresh) )
6900                 {
6901                     e(m-1) = 0;
6902                     continue;
6903                 }
6904                 if( tol>=0 )
6905                 {
6906 
6907                     //
6908                     // If relative accuracy desired,
6909                     // apply convergence criterion forward
6910                     //
6911                     mu = amp::abs<Precision>(d(ll));
6912                     sminl = mu;
6913                     iterflag = false;
6914                     for(lll=ll; lll<=m-1; lll++)
6915                     {
6916                         if( amp::abs<Precision>(e(lll))<=tol*mu )
6917                         {
6918                             e(lll) = 0;
6919                             iterflag = true;
6920                             break;
6921                         }
6922                         sminlo = sminl;
6923                         mu = amp::abs<Precision>(d(lll+1))*(mu/(mu+amp::abs<Precision>(e(lll))));
6924                         sminl = amp::minimum<Precision>(sminl, mu);
6925                     }
6926                     if( iterflag )
6927                     {
6928                         continue;
6929                     }
6930                 }
6931             }
6932             else
6933             {
6934 
6935                 //
6936                 // Run convergence test in backward direction
6937                 // First apply standard test to top of matrix
6938                 //
6939                 if( amp::abs<Precision>(e(ll))<=amp::abs<Precision>(tol)*amp::abs<Precision>(d(ll)) || (tol<0 && amp::abs<Precision>(e(ll))<=thresh) )
6940                 {
6941                     e(ll) = 0;
6942                     continue;
6943                 }
6944                 if( tol>=0 )
6945                 {
6946 
6947                     //
6948                     // If relative accuracy desired,
6949                     // apply convergence criterion backward
6950                     //
6951                     mu = amp::abs<Precision>(d(m));
6952                     sminl = mu;
6953                     iterflag = false;
6954                     for(lll=m-1; lll>=ll; lll--)
6955                     {
6956                         if( amp::abs<Precision>(e(lll))<=tol*mu )
6957                         {
6958                             e(lll) = 0;
6959                             iterflag = true;
6960                             break;
6961                         }
6962                         sminlo = sminl;
6963                         mu = amp::abs<Precision>(d(lll))*(mu/(mu+amp::abs<Precision>(e(lll))));
6964                         sminl = amp::minimum<Precision>(sminl, mu);
6965                     }
6966                     if( iterflag )
6967                     {
6968                         continue;
6969                     }
6970                 }
6971             }
6972             oldll = ll;
6973             oldm = m;
6974 
6975             //
6976             // Compute shift.  First, test if shifting would ruin relative
6977             // accuracy, and if so set the shift to zero.
6978             //
6979             if( tol>=0 && n*tol*(sminl/smax)<=amp::maximum<Precision>(eps, amp::ampf<Precision>("0.01")*tol) )
6980             {
6981 
6982                 //
6983                 // Use a zero shift to avoid loss of relative accuracy
6984                 //
6985                 shift = 0;
6986             }
6987             else
6988             {
6989 
6990                 //
6991                 // Compute the shift from 2-by-2 block at end of matrix
6992                 //
6993                 if( idir==1 )
6994                 {
6995                     sll = amp::abs<Precision>(d(ll));
6996                     svd2x2<Precision>(d(m-1), e(m-1), d(m), shift, r);
6997                 }
6998                 else
6999                 {
7000                     sll = amp::abs<Precision>(d(m));
7001                     svd2x2<Precision>(d(ll), e(ll), d(ll+1), shift, r);
7002                 }
7003 
7004                 //
7005                 // Test if shift negligible, and if so set to zero
7006                 //
7007                 if( sll>0 )
7008                 {
7009                     if( amp::sqr<Precision>(shift/sll)<eps )
7010                     {
7011                         shift = 0;
7012                     }
7013                 }
7014             }
7015 
7016             //
7017             // Increment iteration count
7018             //
7019             iter = iter+m-ll;
7020 
7021             //
7022             // If SHIFT = 0, do simplified QR iteration
7023             //
7024             if( shift==0 )
7025             {
7026                 if( idir==1 )
7027                 {
7028 
7029                     //
7030                     // Chase bulge from top to bottom
7031                     // Save cosines and sines for later singular vector updates
7032                     //
7033                     cs = 1;
7034                     oldcs = 1;
7035                     for(i=ll; i<=m-1; i++)
7036                     {
7037                         rotations::generaterotation<Precision>(d(i)*cs, e(i), cs, sn, r);
7038                         if( i>ll )
7039                         {
7040                             e(i-1) = oldsn*r;
7041                         }
7042                         rotations::generaterotation<Precision>(oldcs*r, d(i+1)*sn, oldcs, oldsn, tmp);
7043                         d(i) = tmp;
7044                         work0(i-ll+1) = cs;
7045                         work1(i-ll+1) = sn;
7046                         work2(i-ll+1) = oldcs;
7047                         work3(i-ll+1) = oldsn;
7048                     }
7049                     h = d(m)*cs;
7050                     d(m) = h*oldcs;
7051                     e(m-1) = h*oldsn;
7052 
7053                     //
7054                     // Update singular vectors
7055                     //
7056                     if( ncvt>0 )
7057                     {
7058                         rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
7059                     }
7060                     if( nru>0 )
7061                     {
7062                         rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
7063                     }
7064                     if( ncc>0 )
7065                     {
7066                         rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
7067                     }
7068 
7069                     //
7070                     // Test convergence
7071                     //
7072                     if( amp::abs<Precision>(e(m-1))<=thresh )
7073                     {
7074                         e(m-1) = 0;
7075                     }
7076                 }
7077                 else
7078                 {
7079 
7080                     //
7081                     // Chase bulge from bottom to top
7082                     // Save cosines and sines for later singular vector updates
7083                     //
7084                     cs = 1;
7085                     oldcs = 1;
7086                     for(i=m; i>=ll+1; i--)
7087                     {
7088                         rotations::generaterotation<Precision>(d(i)*cs, e(i-1), cs, sn, r);
7089                         if( i<m )
7090                         {
7091                             e(i) = oldsn*r;
7092                         }
7093                         rotations::generaterotation<Precision>(oldcs*r, d(i-1)*sn, oldcs, oldsn, tmp);
7094                         d(i) = tmp;
7095                         work0(i-ll) = cs;
7096                         work1(i-ll) = -sn;
7097                         work2(i-ll) = oldcs;
7098                         work3(i-ll) = -oldsn;
7099                     }
7100                     h = d(ll)*cs;
7101                     d(ll) = h*oldcs;
7102                     e(ll) = h*oldsn;
7103 
7104                     //
7105                     // Update singular vectors
7106                     //
7107                     if( ncvt>0 )
7108                     {
7109                         rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
7110                     }
7111                     if( nru>0 )
7112                     {
7113                         rotations::applyrotationsfromtheright<Precision>(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
7114                     }
7115                     if( ncc>0 )
7116                     {
7117                         rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
7118                     }
7119 
7120                     //
7121                     // Test convergence
7122                     //
7123                     if( amp::abs<Precision>(e(ll))<=thresh )
7124                     {
7125                         e(ll) = 0;
7126                     }
7127                 }
7128             }
7129             else
7130             {
7131 
7132                 //
7133                 // Use nonzero shift
7134                 //
7135                 if( idir==1 )
7136                 {
7137 
7138                     //
7139                     // Chase bulge from top to bottom
7140                     // Save cosines and sines for later singular vector updates
7141                     //
7142                     f = (amp::abs<Precision>(d(ll))-shift)*(extsignbdsqr<Precision>(1, d(ll))+shift/d(ll));
7143                     g = e(ll);
7144                     for(i=ll; i<=m-1; i++)
7145                     {
7146                         rotations::generaterotation<Precision>(f, g, cosr, sinr, r);
7147                         if( i>ll )
7148                         {
7149                             e(i-1) = r;
7150                         }
7151                         f = cosr*d(i)+sinr*e(i);
7152                         e(i) = cosr*e(i)-sinr*d(i);
7153                         g = sinr*d(i+1);
7154                         d(i+1) = cosr*d(i+1);
7155                         rotations::generaterotation<Precision>(f, g, cosl, sinl, r);
7156                         d(i) = r;
7157                         f = cosl*e(i)+sinl*d(i+1);
7158                         d(i+1) = cosl*d(i+1)-sinl*e(i);
7159                         if( i<m-1 )
7160                         {
7161                             g = sinl*e(i+1);
7162                             e(i+1) = cosl*e(i+1);
7163                         }
7164                         work0(i-ll+1) = cosr;
7165                         work1(i-ll+1) = sinr;
7166                         work2(i-ll+1) = cosl;
7167                         work3(i-ll+1) = sinl;
7168                     }
7169                     e(m-1) = f;
7170 
7171                     //
7172                     // Update singular vectors
7173                     //
7174                     if( ncvt>0 )
7175                     {
7176                         rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
7177                     }
7178                     if( nru>0 )
7179                     {
7180                         rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
7181                     }
7182                     if( ncc>0 )
7183                     {
7184                         rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
7185                     }
7186 
7187                     //
7188                     // Test convergence
7189                     //
7190                     if( amp::abs<Precision>(e(m-1))<=thresh )
7191                     {
7192                         e(m-1) = 0;
7193                     }
7194                 }
7195                 else
7196                 {
7197 
7198                     //
7199                     // Chase bulge from bottom to top
7200                     // Save cosines and sines for later singular vector updates
7201                     //
7202                     f = (amp::abs<Precision>(d(m))-shift)*(extsignbdsqr<Precision>(1, d(m))+shift/d(m));
7203                     g = e(m-1);
7204                     for(i=m; i>=ll+1; i--)
7205                     {
7206                         rotations::generaterotation<Precision>(f, g, cosr, sinr, r);
7207                         if( i<m )
7208                         {
7209                             e(i) = r;
7210                         }
7211                         f = cosr*d(i)+sinr*e(i-1);
7212                         e(i-1) = cosr*e(i-1)-sinr*d(i);
7213                         g = sinr*d(i-1);
7214                         d(i-1) = cosr*d(i-1);
7215                         rotations::generaterotation<Precision>(f, g, cosl, sinl, r);
7216                         d(i) = r;
7217                         f = cosl*e(i-1)+sinl*d(i-1);
7218                         d(i-1) = cosl*d(i-1)-sinl*e(i-1);
7219                         if( i>ll+1 )
7220                         {
7221                             g = sinl*e(i-2);
7222                             e(i-2) = cosl*e(i-2);
7223                         }
7224                         work0(i-ll) = cosr;
7225                         work1(i-ll) = -sinr;
7226                         work2(i-ll) = cosl;
7227                         work3(i-ll) = -sinl;
7228                     }
7229                     e(ll) = f;
7230 
7231                     //
7232                     // Test convergence
7233                     //
7234                     if( amp::abs<Precision>(e(ll))<=thresh )
7235                     {
7236                         e(ll) = 0;
7237                     }
7238 
7239                     //
7240                     // Update singular vectors if desired
7241                     //
7242                     if( ncvt>0 )
7243                     {
7244                         rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
7245                     }
7246                     if( nru>0 )
7247                     {
7248                         rotations::applyrotationsfromtheright<Precision>(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
7249                     }
7250                     if( ncc>0 )
7251                     {
7252                         rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
7253                     }
7254                 }
7255             }
7256 
7257             //
7258             // QR iteration finished, go back and check convergence
7259             //
7260             continue;
7261         }
7262 
7263         //
7264         // All singular values converged, so make them positive
7265         //
7266         for(i=1; i<=n; i++)
7267         {
7268             if( d(i)<0 )
7269             {
7270                 d(i) = -d(i);
7271 
7272                 //
7273                 // Change sign of singular vectors, if desired
7274                 //
7275                 if( ncvt>0 )
7276                 {
7277                     ap::vmul(vt.getrow(i+vstart-1, vstart, vend), -1);
7278                 }
7279             }
7280         }
7281 
7282         //
7283         // Sort the singular values into decreasing order (insertion sort on
7284         // singular values, but only one transposition per singular vector)
7285         //
7286         for(i=1; i<=n-1; i++)
7287         {
7288 
7289             //
7290             // Scan for smallest D(I)
7291             //
7292             isub = 1;
7293             smin = d(1);
7294             for(j=2; j<=n+1-i; j++)
7295             {
7296                 if( d(j)<=smin )
7297                 {
7298                     isub = j;
7299                     smin = d(j);
7300                 }
7301             }
7302             if( isub!=n+1-i )
7303             {
7304 
7305                 //
7306                 // Swap singular values and vectors
7307                 //
7308                 d(isub) = d(n+1-i);
7309                 d(n+1-i) = smin;
7310                 if( ncvt>0 )
7311                 {
7312                     j = n+1-i;
7313                     ap::vmove(vttemp.getvector(vstart, vend), vt.getrow(isub+vstart-1, vstart, vend));
7314                     ap::vmove(vt.getrow(isub+vstart-1, vstart, vend), vt.getrow(j+vstart-1, vstart, vend));
7315                     ap::vmove(vt.getrow(j+vstart-1, vstart, vend), vttemp.getvector(vstart, vend));
7316                 }
7317                 if( nru>0 )
7318                 {
7319                     j = n+1-i;
7320                     ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(isub+ustart-1, ustart, uend));
7321                     ap::vmove(u.getcolumn(isub+ustart-1, ustart, uend), u.getcolumn(j+ustart-1, ustart, uend));
7322                     ap::vmove(u.getcolumn(j+ustart-1, ustart, uend), utemp.getvector(ustart, uend));
7323                 }
7324                 if( ncc>0 )
7325                 {
7326                     j = n+1-i;
7327                     ap::vmove(ctemp.getvector(cstart, cend), c.getrow(isub+cstart-1, cstart, cend));
7328                     ap::vmove(c.getrow(isub+cstart-1, cstart, cend), c.getrow(j+cstart-1, cstart, cend));
7329                     ap::vmove(c.getrow(j+cstart-1, cstart, cend), ctemp.getvector(cstart, cend));
7330                 }
7331             }
7332         }
7333         return result;
7334     }
7335 
7336 
7337     template<unsigned int Precision>
extsignbdsqr(amp::ampf<Precision> a,amp::ampf<Precision> b)7338     amp::ampf<Precision> extsignbdsqr(amp::ampf<Precision> a,
7339         amp::ampf<Precision> b)
7340     {
7341         amp::ampf<Precision> result;
7342 
7343 
7344         if( b>=0 )
7345         {
7346             result = amp::abs<Precision>(a);
7347         }
7348         else
7349         {
7350             result = -amp::abs<Precision>(a);
7351         }
7352         return result;
7353     }
7354 
7355 
7356     template<unsigned int Precision>
svd2x2(amp::ampf<Precision> f,amp::ampf<Precision> g,amp::ampf<Precision> h,amp::ampf<Precision> & ssmin,amp::ampf<Precision> & ssmax)7357     void svd2x2(amp::ampf<Precision> f,
7358         amp::ampf<Precision> g,
7359         amp::ampf<Precision> h,
7360         amp::ampf<Precision>& ssmin,
7361         amp::ampf<Precision>& ssmax)
7362     {
7363         amp::ampf<Precision> aas;
7364         amp::ampf<Precision> at;
7365         amp::ampf<Precision> au;
7366         amp::ampf<Precision> c;
7367         amp::ampf<Precision> fa;
7368         amp::ampf<Precision> fhmn;
7369         amp::ampf<Precision> fhmx;
7370         amp::ampf<Precision> ga;
7371         amp::ampf<Precision> ha;
7372 
7373 
7374         fa = amp::abs<Precision>(f);
7375         ga = amp::abs<Precision>(g);
7376         ha = amp::abs<Precision>(h);
7377         fhmn = amp::minimum<Precision>(fa, ha);
7378         fhmx = amp::maximum<Precision>(fa, ha);
7379         if( fhmn==0 )
7380         {
7381             ssmin = 0;
7382             if( fhmx==0 )
7383             {
7384                 ssmax = ga;
7385             }
7386             else
7387             {
7388                 ssmax = amp::maximum<Precision>(fhmx, ga)*amp::sqrt<Precision>(1+amp::sqr<Precision>(amp::minimum<Precision>(fhmx, ga)/amp::maximum<Precision>(fhmx, ga)));
7389             }
7390         }
7391         else
7392         {
7393             if( ga<fhmx )
7394             {
7395                 aas = 1+fhmn/fhmx;
7396                 at = (fhmx-fhmn)/fhmx;
7397                 au = amp::sqr<Precision>(ga/fhmx);
7398                 c = 2/(amp::sqrt<Precision>(aas*aas+au)+amp::sqrt<Precision>(at*at+au));
7399                 ssmin = fhmn*c;
7400                 ssmax = fhmx/c;
7401             }
7402             else
7403             {
7404                 au = fhmx/ga;
7405                 if( au==0 )
7406                 {
7407 
7408                     //
7409                     // Avoid possible harmful underflow if exponent range
7410                     // asymmetric (true SSMIN may not underflow even if
7411                     // AU underflows)
7412                     //
7413                     ssmin = fhmn*fhmx/ga;
7414                     ssmax = ga;
7415                 }
7416                 else
7417                 {
7418                     aas = 1+fhmn/fhmx;
7419                     at = (fhmx-fhmn)/fhmx;
7420                     c = 1/(amp::sqrt<Precision>(1+amp::sqr<Precision>(aas*au))+amp::sqrt<Precision>(1+amp::sqr<Precision>(at*au)));
7421                     ssmin = fhmn*c*au;
7422                     ssmin = ssmin+ssmin;
7423                     ssmax = ga/(c+c);
7424                 }
7425             }
7426         }
7427     }
7428 
7429 
7430     template<unsigned int Precision>
svdv2x2(amp::ampf<Precision> f,amp::ampf<Precision> g,amp::ampf<Precision> h,amp::ampf<Precision> & ssmin,amp::ampf<Precision> & ssmax,amp::ampf<Precision> & snr,amp::ampf<Precision> & csr,amp::ampf<Precision> & snl,amp::ampf<Precision> & csl)7431     void svdv2x2(amp::ampf<Precision> f,
7432         amp::ampf<Precision> g,
7433         amp::ampf<Precision> h,
7434         amp::ampf<Precision>& ssmin,
7435         amp::ampf<Precision>& ssmax,
7436         amp::ampf<Precision>& snr,
7437         amp::ampf<Precision>& csr,
7438         amp::ampf<Precision>& snl,
7439         amp::ampf<Precision>& csl)
7440     {
7441         bool gasmal;
7442         bool swp;
7443         int pmax;
7444         amp::ampf<Precision> a;
7445         amp::ampf<Precision> clt;
7446         amp::ampf<Precision> crt;
7447         amp::ampf<Precision> d;
7448         amp::ampf<Precision> fa;
7449         amp::ampf<Precision> ft;
7450         amp::ampf<Precision> ga;
7451         amp::ampf<Precision> gt;
7452         amp::ampf<Precision> ha;
7453         amp::ampf<Precision> ht;
7454         amp::ampf<Precision> l;
7455         amp::ampf<Precision> m;
7456         amp::ampf<Precision> mm;
7457         amp::ampf<Precision> r;
7458         amp::ampf<Precision> s;
7459         amp::ampf<Precision> slt;
7460         amp::ampf<Precision> srt;
7461         amp::ampf<Precision> t;
7462         amp::ampf<Precision> temp;
7463         amp::ampf<Precision> tsign;
7464         amp::ampf<Precision> tt;
7465         amp::ampf<Precision> v;
7466 
7467 
7468         ft = f;
7469         fa = amp::abs<Precision>(ft);
7470         ht = h;
7471         ha = amp::abs<Precision>(h);
7472 
7473         //
7474         // PMAX points to the maximum absolute element of matrix
7475         //  PMAX = 1 if F largest in absolute values
7476         //  PMAX = 2 if G largest in absolute values
7477         //  PMAX = 3 if H largest in absolute values
7478         //
7479         pmax = 1;
7480         swp = ha>fa;
7481         if( swp )
7482         {
7483 
7484             //
7485             // Now FA .ge. HA
7486             //
7487             pmax = 3;
7488             temp = ft;
7489             ft = ht;
7490             ht = temp;
7491             temp = fa;
7492             fa = ha;
7493             ha = temp;
7494         }
7495         gt = g;
7496         ga = amp::abs<Precision>(gt);
7497         if( ga==0 )
7498         {
7499 
7500             //
7501             // Diagonal matrix
7502             //
7503             ssmin = ha;
7504             ssmax = fa;
7505             clt = 1;
7506             crt = 1;
7507             slt = 0;
7508             srt = 0;
7509         }
7510         else
7511         {
7512             gasmal = true;
7513             if( ga>fa )
7514             {
7515                 pmax = 2;
7516                 if( fa/ga<amp::ampf<Precision>::getAlgoPascalEpsilon() )
7517                 {
7518 
7519                     //
7520                     // Case of very large GA
7521                     //
7522                     gasmal = false;
7523                     ssmax = ga;
7524                     if( ha>1 )
7525                     {
7526                         v = ga/ha;
7527                         ssmin = fa/v;
7528                     }
7529                     else
7530                     {
7531                         v = fa/ga;
7532                         ssmin = v*ha;
7533                     }
7534                     clt = 1;
7535                     slt = ht/gt;
7536                     srt = 1;
7537                     crt = ft/gt;
7538                 }
7539             }
7540             if( gasmal )
7541             {
7542 
7543                 //
7544                 // Normal case
7545                 //
7546                 d = fa-ha;
7547                 if( d==fa )
7548                 {
7549                     l = 1;
7550                 }
7551                 else
7552                 {
7553                     l = d/fa;
7554                 }
7555                 m = gt/ft;
7556                 t = 2-l;
7557                 mm = m*m;
7558                 tt = t*t;
7559                 s = amp::sqrt<Precision>(tt+mm);
7560                 if( l==0 )
7561                 {
7562                     r = amp::abs<Precision>(m);
7563                 }
7564                 else
7565                 {
7566                     r = amp::sqrt<Precision>(l*l+mm);
7567                 }
7568                 a = amp::ampf<Precision>("0.5")*(s+r);
7569                 ssmin = ha/a;
7570                 ssmax = fa*a;
7571                 if( mm==0 )
7572                 {
7573 
7574                     //
7575                     // Note that M is very tiny
7576                     //
7577                     if( l==0 )
7578                     {
7579                         t = extsignbdsqr<Precision>(2, ft)*extsignbdsqr<Precision>(1, gt);
7580                     }
7581                     else
7582                     {
7583                         t = gt/extsignbdsqr<Precision>(d, ft)+m/t;
7584                     }
7585                 }
7586                 else
7587                 {
7588                     t = (m/(s+t)+m/(r+l))*(1+a);
7589                 }
7590                 l = amp::sqrt<Precision>(t*t+4);
7591                 crt = 2/l;
7592                 srt = t/l;
7593                 clt = (crt+srt*m)/a;
7594                 v = ht/ft;
7595                 slt = v*srt/a;
7596             }
7597         }
7598         if( swp )
7599         {
7600             csl = srt;
7601             snl = crt;
7602             csr = slt;
7603             snr = clt;
7604         }
7605         else
7606         {
7607             csl = clt;
7608             snl = slt;
7609             csr = crt;
7610             snr = srt;
7611         }
7612 
7613         //
7614         // Correct signs of SSMAX and SSMIN
7615         //
7616         if( pmax==1 )
7617         {
7618             tsign = extsignbdsqr<Precision>(1, csr)*extsignbdsqr<Precision>(1, csl)*extsignbdsqr<Precision>(1, f);
7619         }
7620         if( pmax==2 )
7621         {
7622             tsign = extsignbdsqr<Precision>(1, snr)*extsignbdsqr<Precision>(1, csl)*extsignbdsqr<Precision>(1, g);
7623         }
7624         if( pmax==3 )
7625         {
7626             tsign = extsignbdsqr<Precision>(1, snr)*extsignbdsqr<Precision>(1, snl)*extsignbdsqr<Precision>(1, h);
7627         }
7628         ssmax = extsignbdsqr<Precision>(ssmax, tsign);
7629         ssmin = extsignbdsqr<Precision>(ssmin, tsign*extsignbdsqr<Precision>(1, f)*extsignbdsqr<Precision>(1, h));
7630     }
7631 } // namespace
7632 
7633 /* stuff included from ./svd.h */
7634 
7635 /*************************************************************************
7636 Copyright (c) 2005-2007, Sergey Bochkanov (ALGLIB project).
7637 
7638 Redistribution and use in source and binary forms, with or without
7639 modification, are permitted provided that the following conditions are
7640 met:
7641 
7642 - Redistributions of source code must retain the above copyright
7643   notice, this list of conditions and the following disclaimer.
7644 
7645 - Redistributions in binary form must reproduce the above copyright
7646   notice, this list of conditions and the following disclaimer listed
7647   in this license in the documentation and/or other materials
7648   provided with the distribution.
7649 
7650 - Neither the name of the copyright holders nor the names of its
7651   contributors may be used to endorse or promote products derived from
7652   this software without specific prior written permission.
7653 
7654 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
7655 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
7656 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
7657 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
7658 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
7659 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
7660 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
7661 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
7662 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
7663 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
7664 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7665 *************************************************************************/
7666 
7667 /*MAKEHEADER*/
7668 
7669 /*MAKEHEADER*/
7670 
7671 /*MAKEHEADER*/
7672 
7673 /*MAKEHEADER*/
7674 
7675 /*MAKEHEADER*/
7676 
7677 /*MAKEHEADER*/
7678 
7679 /*MAKEHEADER*/
7680 
7681 /*MAKEHEADER*/
7682 
7683 /*MAKEHEADER*/
7684 
7685 namespace svd
7686 {
7687     template<unsigned int Precision>
7688     bool rmatrixsvd(ap::template_2d_array< amp::ampf<Precision> > a,
7689         int m,
7690         int n,
7691         int uneeded,
7692         int vtneeded,
7693         int additionalmemory,
7694         ap::template_1d_array< amp::ampf<Precision> >& w,
7695         ap::template_2d_array< amp::ampf<Precision> >& u,
7696         ap::template_2d_array< amp::ampf<Precision> >& vt);
7697     template<unsigned int Precision>
7698     bool svddecomposition(ap::template_2d_array< amp::ampf<Precision> > a,
7699         int m,
7700         int n,
7701         int uneeded,
7702         int vtneeded,
7703         int additionalmemory,
7704         ap::template_1d_array< amp::ampf<Precision> >& w,
7705         ap::template_2d_array< amp::ampf<Precision> >& u,
7706         ap::template_2d_array< amp::ampf<Precision> >& vt);
7707 
7708 
7709     /*************************************************************************
7710     Singular value decomposition of a rectangular matrix.
7711 
7712     The algorithm calculates the singular value decomposition of a matrix of
7713     size MxN: A = U * S * V^T
7714 
7715     The algorithm finds the singular values and, optionally, matrices U and V^T.
7716     The algorithm can find both first min(M,N) columns of matrix U and rows of
7717     matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
7718     and NxN respectively).
7719 
7720     Take into account that the subroutine does not return matrix V but V^T.
7721 
7722     Input parameters:
7723         A           -   matrix to be decomposed.
7724                         Array whose indexes range within [0..M-1, 0..N-1].
7725         M           -   number of rows in matrix A.
7726         N           -   number of columns in matrix A.
7727         UNeeded     -   0, 1 or 2. See the description of the parameter U.
7728         VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
7729         AdditionalMemory -
7730                         If the parameter:
7731                            memory (lower requirements, lower performance).
7732                          * equals 1, the algorithm uses additional
7733                            memory of size min(M,N)*min(M,N) of real numbers.
7734                            It often speeds up the algorithm.
7735                          * equals 2, the algorithm uses additional
7736                            memory of size M*min(M,N) of real numbers.
7737                            It allows to get a maximum performance.
7738                         The recommended value of the parameter is 2.
7739 
7740     Output parameters:
7741         W           -   contains singular values in descending order.
7742         U           -   if UNeeded=0, U isn't changed, the left singular vectors
7743                         are not calculated.
7744                         if Uneeded=1, U contains left singular vectors (first
7745                         min(M,N) columns of matrix U). Array whose indexes range
7746                         within [0..M-1, 0..Min(M,N)-1].
7747                         if UNeeded=2, U contains matrix U wholly. Array whose
7748                         indexes range within [0..M-1, 0..M-1].
7749                         are not calculated.
7750                         if VTNeeded=1, VT contains right singular vectors (first
7751                         min(M,N) rows of matrix V^T). Array whose indexes range
7752                         within [0..min(M,N)-1, 0..N-1].
7753                         if VTNeeded=2, VT contains matrix V^T wholly. Array whose
7754                         indexes range within [0..N-1, 0..N-1].
7755 
7756       -- ALGLIB --
7757          Copyright 2005 by Bochkanov Sergey
7758     *************************************************************************/
7759     template<unsigned int Precision>
rmatrixsvd(ap::template_2d_array<amp::ampf<Precision>> a,int m,int n,int uneeded,int vtneeded,int additionalmemory,ap::template_1d_array<amp::ampf<Precision>> & w,ap::template_2d_array<amp::ampf<Precision>> & u,ap::template_2d_array<amp::ampf<Precision>> & vt)7760     bool rmatrixsvd(ap::template_2d_array< amp::ampf<Precision> > a,
7761         int m,
7762         int n,
7763         int uneeded,
7764         int vtneeded,
7765         int additionalmemory,
7766         ap::template_1d_array< amp::ampf<Precision> >& w,
7767         ap::template_2d_array< amp::ampf<Precision> >& u,
7768         ap::template_2d_array< amp::ampf<Precision> >& vt)
7769     {
7770         bool result;
7771         ap::template_1d_array< amp::ampf<Precision> > tauq;
7772         ap::template_1d_array< amp::ampf<Precision> > taup;
7773         ap::template_1d_array< amp::ampf<Precision> > tau;
7774         ap::template_1d_array< amp::ampf<Precision> > e;
7775         ap::template_1d_array< amp::ampf<Precision> > work;
7776         ap::template_2d_array< amp::ampf<Precision> > t2;
7777         bool isupper;
7778         int minmn;
7779         int ncu;
7780         int nrvt;
7781         int nru;
7782         int ncvt;
7783         int i;
7784         int j;
7785         int im1;
7786         amp::ampf<Precision> sm;
7787 
7788 
7789         result = true;
7790         if( m==0 || n==0 )
7791         {
7792             return result;
7793         }
7794         ap::ap_error::make_assertion(uneeded>=0 && uneeded<=2);
7795         ap::ap_error::make_assertion(vtneeded>=0 && vtneeded<=2);
7796         ap::ap_error::make_assertion(additionalmemory>=0 && additionalmemory<=2);
7797 
7798         //
7799         // initialize
7800         //
7801         minmn = ap::minint(m, n);
7802         w.setbounds(1, minmn);
7803         ncu = 0;
7804         nru = 0;
7805         if( uneeded==1 )
7806         {
7807             nru = m;
7808             ncu = minmn;
7809             u.setbounds(0, nru-1, 0, ncu-1);
7810         }
7811         if( uneeded==2 )
7812         {
7813             nru = m;
7814             ncu = m;
7815             u.setbounds(0, nru-1, 0, ncu-1);
7816         }
7817         nrvt = 0;
7818         ncvt = 0;
7819         if( vtneeded==1 )
7820         {
7821             nrvt = minmn;
7822             ncvt = n;
7823             vt.setbounds(0, nrvt-1, 0, ncvt-1);
7824         }
7825         if( vtneeded==2 )
7826         {
7827             nrvt = n;
7828             ncvt = n;
7829             vt.setbounds(0, nrvt-1, 0, ncvt-1);
7830         }
7831 
7832         //
7833         // M much larger than N
7834         // Use bidiagonal reduction with QR-decomposition
7835         //
7836         if( m>amp::ampf<Precision>("1.6")*n )
7837         {
7838             if( uneeded==0 )
7839             {
7840 
7841                 //
7842                 // No left singular vectors to be computed
7843                 //
7844                 qr::rmatrixqr<Precision>(a, m, n, tau);
7845                 for(i=0; i<=n-1; i++)
7846                 {
7847                     for(j=0; j<=i-1; j++)
7848                     {
7849                         a(i,j) = 0;
7850                     }
7851                 }
7852                 bidiagonal::rmatrixbd<Precision>(a, n, n, tauq, taup);
7853                 bidiagonal::rmatrixbdunpackpt<Precision>(a, n, n, taup, nrvt, vt);
7854                 bidiagonal::rmatrixbdunpackdiagonals<Precision>(a, n, n, isupper, w, e);
7855                 result = bdsvd::rmatrixbdsvd<Precision>(w, e, n, isupper, false, u, 0, a, 0, vt, ncvt);
7856                 return result;
7857             }
7858             else
7859             {
7860 
7861                 //
7862                 // Left singular vectors (may be full matrix U) to be computed
7863                 //
7864                 qr::rmatrixqr<Precision>(a, m, n, tau);
7865                 qr::rmatrixqrunpackq<Precision>(a, m, n, tau, ncu, u);
7866                 for(i=0; i<=n-1; i++)
7867                 {
7868                     for(j=0; j<=i-1; j++)
7869                     {
7870                         a(i,j) = 0;
7871                     }
7872                 }
7873                 bidiagonal::rmatrixbd<Precision>(a, n, n, tauq, taup);
7874                 bidiagonal::rmatrixbdunpackpt<Precision>(a, n, n, taup, nrvt, vt);
7875                 bidiagonal::rmatrixbdunpackdiagonals<Precision>(a, n, n, isupper, w, e);
7876                 if( additionalmemory<1 )
7877                 {
7878 
7879                     //
7880                     // No additional memory can be used
7881                     //
7882                     bidiagonal::rmatrixbdmultiplybyq<Precision>(a, n, n, tauq, u, m, n, true, false);
7883                     result = bdsvd::rmatrixbdsvd<Precision>(w, e, n, isupper, false, u, m, a, 0, vt, ncvt);
7884                 }
7885                 else
7886                 {
7887 
7888                     //
7889                     // Large U. Transforming intermediate matrix T2
7890                     //
7891                     work.setbounds(1, ap::maxint(m, n));
7892                     bidiagonal::rmatrixbdunpackq<Precision>(a, n, n, tauq, n, t2);
7893                     blas::copymatrix<Precision>(u, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1);
7894                     blas::inplacetranspose<Precision>(t2, 0, n-1, 0, n-1, work);
7895                     result = bdsvd::rmatrixbdsvd<Precision>(w, e, n, isupper, false, u, 0, t2, n, vt, ncvt);
7896                     blas::matrixmatrixmultiply<Precision>(a, 0, m-1, 0, n-1, false, t2, 0, n-1, 0, n-1, true, amp::ampf<Precision>("1.0"), u, 0, m-1, 0, n-1, amp::ampf<Precision>("0.0"), work);
7897                 }
7898                 return result;
7899             }
7900         }
7901 
7902         //
7903         // N much larger than M
7904         // Use bidiagonal reduction with LQ-decomposition
7905         //
7906         if( n>amp::ampf<Precision>("1.6")*m )
7907         {
7908             if( vtneeded==0 )
7909             {
7910 
7911                 //
7912                 // No right singular vectors to be computed
7913                 //
7914                 lq::rmatrixlq<Precision>(a, m, n, tau);
7915                 for(i=0; i<=m-1; i++)
7916                 {
7917                     for(j=i+1; j<=m-1; j++)
7918                     {
7919                         a(i,j) = 0;
7920                     }
7921                 }
7922                 bidiagonal::rmatrixbd<Precision>(a, m, m, tauq, taup);
7923                 bidiagonal::rmatrixbdunpackq<Precision>(a, m, m, tauq, ncu, u);
7924                 bidiagonal::rmatrixbdunpackdiagonals<Precision>(a, m, m, isupper, w, e);
7925                 work.setbounds(1, m);
7926                 blas::inplacetranspose<Precision>(u, 0, nru-1, 0, ncu-1, work);
7927                 result = bdsvd::rmatrixbdsvd<Precision>(w, e, m, isupper, false, a, 0, u, nru, vt, 0);
7928                 blas::inplacetranspose<Precision>(u, 0, nru-1, 0, ncu-1, work);
7929                 return result;
7930             }
7931             else
7932             {
7933 
7934                 //
7935                 // Right singular vectors (may be full matrix VT) to be computed
7936                 //
7937                 lq::rmatrixlq<Precision>(a, m, n, tau);
7938                 lq::rmatrixlqunpackq<Precision>(a, m, n, tau, nrvt, vt);
7939                 for(i=0; i<=m-1; i++)
7940                 {
7941                     for(j=i+1; j<=m-1; j++)
7942                     {
7943                         a(i,j) = 0;
7944                     }
7945                 }
7946                 bidiagonal::rmatrixbd<Precision>(a, m, m, tauq, taup);
7947                 bidiagonal::rmatrixbdunpackq<Precision>(a, m, m, tauq, ncu, u);
7948                 bidiagonal::rmatrixbdunpackdiagonals<Precision>(a, m, m, isupper, w, e);
7949                 work.setbounds(1, ap::maxint(m, n));
7950                 blas::inplacetranspose<Precision>(u, 0, nru-1, 0, ncu-1, work);
7951                 if( additionalmemory<1 )
7952                 {
7953 
7954                     //
7955                     // No additional memory available
7956                     //
7957                     bidiagonal::rmatrixbdmultiplybyp<Precision>(a, m, m, taup, vt, m, n, false, true);
7958                     result = bdsvd::rmatrixbdsvd<Precision>(w, e, m, isupper, false, a, 0, u, nru, vt, n);
7959                 }
7960                 else
7961                 {
7962 
7963                     //
7964                     // Large VT. Transforming intermediate matrix T2
7965                     //
7966                     bidiagonal::rmatrixbdunpackpt<Precision>(a, m, m, taup, m, t2);
7967                     result = bdsvd::rmatrixbdsvd<Precision>(w, e, m, isupper, false, a, 0, u, nru, t2, m);
7968                     blas::copymatrix<Precision>(vt, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1);
7969                     blas::matrixmatrixmultiply<Precision>(t2, 0, m-1, 0, m-1, false, a, 0, m-1, 0, n-1, false, amp::ampf<Precision>("1.0"), vt, 0, m-1, 0, n-1, amp::ampf<Precision>("0.0"), work);
7970                 }
7971                 blas::inplacetranspose<Precision>(u, 0, nru-1, 0, ncu-1, work);
7972                 return result;
7973             }
7974         }
7975 
7976         //
7977         // M<=N
7978         // We can use inplace transposition of U to get rid of columnwise operations
7979         //
7980         if( m<=n )
7981         {
7982             bidiagonal::rmatrixbd<Precision>(a, m, n, tauq, taup);
7983             bidiagonal::rmatrixbdunpackq<Precision>(a, m, n, tauq, ncu, u);
7984             bidiagonal::rmatrixbdunpackpt<Precision>(a, m, n, taup, nrvt, vt);
7985             bidiagonal::rmatrixbdunpackdiagonals<Precision>(a, m, n, isupper, w, e);
7986             work.setbounds(1, m);
7987             blas::inplacetranspose<Precision>(u, 0, nru-1, 0, ncu-1, work);
7988             result = bdsvd::rmatrixbdsvd<Precision>(w, e, minmn, isupper, false, a, 0, u, nru, vt, ncvt);
7989             blas::inplacetranspose<Precision>(u, 0, nru-1, 0, ncu-1, work);
7990             return result;
7991         }
7992 
7993         //
7994         // Simple bidiagonal reduction
7995         //
7996         bidiagonal::rmatrixbd<Precision>(a, m, n, tauq, taup);
7997         bidiagonal::rmatrixbdunpackq<Precision>(a, m, n, tauq, ncu, u);
7998         bidiagonal::rmatrixbdunpackpt<Precision>(a, m, n, taup, nrvt, vt);
7999         bidiagonal::rmatrixbdunpackdiagonals<Precision>(a, m, n, isupper, w, e);
8000         if( additionalmemory<2 || uneeded==0 )
8001         {
8002 
8003             //
8004             // We cant use additional memory or there is no need in such operations
8005             //
8006             result = bdsvd::rmatrixbdsvd<Precision>(w, e, minmn, isupper, false, u, nru, a, 0, vt, ncvt);
8007         }
8008         else
8009         {
8010 
8011             //
8012             // We can use additional memory
8013             //
8014             t2.setbounds(0, minmn-1, 0, m-1);
8015             blas::copyandtranspose<Precision>(u, 0, m-1, 0, minmn-1, t2, 0, minmn-1, 0, m-1);
8016             result = bdsvd::rmatrixbdsvd<Precision>(w, e, minmn, isupper, false, u, 0, t2, m, vt, ncvt);
8017             blas::copyandtranspose<Precision>(t2, 0, minmn-1, 0, m-1, u, 0, m-1, 0, minmn-1);
8018         }
8019         return result;
8020     }
8021 
8022 
8023     /*************************************************************************
8024     Obsolete 1-based subroutine.
8025     See RMatrixSVD for 0-based replacement.
8026     *************************************************************************/
8027     template<unsigned int Precision>
svddecomposition(ap::template_2d_array<amp::ampf<Precision>> a,int m,int n,int uneeded,int vtneeded,int additionalmemory,ap::template_1d_array<amp::ampf<Precision>> & w,ap::template_2d_array<amp::ampf<Precision>> & u,ap::template_2d_array<amp::ampf<Precision>> & vt)8028     bool svddecomposition(ap::template_2d_array< amp::ampf<Precision> > a,
8029         int m,
8030         int n,
8031         int uneeded,
8032         int vtneeded,
8033         int additionalmemory,
8034         ap::template_1d_array< amp::ampf<Precision> >& w,
8035         ap::template_2d_array< amp::ampf<Precision> >& u,
8036         ap::template_2d_array< amp::ampf<Precision> >& vt)
8037     {
8038         bool result;
8039         ap::template_1d_array< amp::ampf<Precision> > tauq;
8040         ap::template_1d_array< amp::ampf<Precision> > taup;
8041         ap::template_1d_array< amp::ampf<Precision> > tau;
8042         ap::template_1d_array< amp::ampf<Precision> > e;
8043         ap::template_1d_array< amp::ampf<Precision> > work;
8044         ap::template_2d_array< amp::ampf<Precision> > t2;
8045         bool isupper;
8046         int minmn;
8047         int ncu;
8048         int nrvt;
8049         int nru;
8050         int ncvt;
8051         int i;
8052         int j;
8053         int im1;
8054         amp::ampf<Precision> sm;
8055 
8056 
8057         result = true;
8058         if( m==0 || n==0 )
8059         {
8060             return result;
8061         }
8062         ap::ap_error::make_assertion(uneeded>=0 && uneeded<=2);
8063         ap::ap_error::make_assertion(vtneeded>=0 && vtneeded<=2);
8064         ap::ap_error::make_assertion(additionalmemory>=0 && additionalmemory<=2);
8065 
8066         //
8067         // initialize
8068         //
8069         minmn = ap::minint(m, n);
8070         w.setbounds(1, minmn);
8071         ncu = 0;
8072         nru = 0;
8073         if( uneeded==1 )
8074         {
8075             nru = m;
8076             ncu = minmn;
8077             u.setbounds(1, nru, 1, ncu);
8078         }
8079         if( uneeded==2 )
8080         {
8081             nru = m;
8082             ncu = m;
8083             u.setbounds(1, nru, 1, ncu);
8084         }
8085         nrvt = 0;
8086         ncvt = 0;
8087         if( vtneeded==1 )
8088         {
8089             nrvt = minmn;
8090             ncvt = n;
8091             vt.setbounds(1, nrvt, 1, ncvt);
8092         }
8093         if( vtneeded==2 )
8094         {
8095             nrvt = n;
8096             ncvt = n;
8097             vt.setbounds(1, nrvt, 1, ncvt);
8098         }
8099 
8100         //
8101         // M much larger than N
8102         // Use bidiagonal reduction with QR-decomposition
8103         //
8104         if( m>amp::ampf<Precision>("1.6")*n )
8105         {
8106             if( uneeded==0 )
8107             {
8108 
8109                 //
8110                 // No left singular vectors to be computed
8111                 //
8112                 qr::qrdecomposition<Precision>(a, m, n, tau);
8113                 for(i=2; i<=n; i++)
8114                 {
8115                     for(j=1; j<=i-1; j++)
8116                     {
8117                         a(i,j) = 0;
8118                     }
8119                 }
8120                 bidiagonal::tobidiagonal<Precision>(a, n, n, tauq, taup);
8121                 bidiagonal::unpackptfrombidiagonal<Precision>(a, n, n, taup, nrvt, vt);
8122                 bidiagonal::unpackdiagonalsfrombidiagonal<Precision>(a, n, n, isupper, w, e);
8123                 result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, n, isupper, false, u, 0, a, 0, vt, ncvt);
8124                 return result;
8125             }
8126             else
8127             {
8128 
8129                 //
8130                 // Left singular vectors (may be full matrix U) to be computed
8131                 //
8132                 qr::qrdecomposition<Precision>(a, m, n, tau);
8133                 qr::unpackqfromqr<Precision>(a, m, n, tau, ncu, u);
8134                 for(i=2; i<=n; i++)
8135                 {
8136                     for(j=1; j<=i-1; j++)
8137                     {
8138                         a(i,j) = 0;
8139                     }
8140                 }
8141                 bidiagonal::tobidiagonal<Precision>(a, n, n, tauq, taup);
8142                 bidiagonal::unpackptfrombidiagonal<Precision>(a, n, n, taup, nrvt, vt);
8143                 bidiagonal::unpackdiagonalsfrombidiagonal<Precision>(a, n, n, isupper, w, e);
8144                 if( additionalmemory<1 )
8145                 {
8146 
8147                     //
8148                     // No additional memory can be used
8149                     //
8150                     bidiagonal::multiplybyqfrombidiagonal<Precision>(a, n, n, tauq, u, m, n, true, false);
8151                     result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, n, isupper, false, u, m, a, 0, vt, ncvt);
8152                 }
8153                 else
8154                 {
8155 
8156                     //
8157                     // Large U. Transforming intermediate matrix T2
8158                     //
8159                     work.setbounds(1, ap::maxint(m, n));
8160                     bidiagonal::unpackqfrombidiagonal<Precision>(a, n, n, tauq, n, t2);
8161                     blas::copymatrix<Precision>(u, 1, m, 1, n, a, 1, m, 1, n);
8162                     blas::inplacetranspose<Precision>(t2, 1, n, 1, n, work);
8163                     result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, n, isupper, false, u, 0, t2, n, vt, ncvt);
8164                     blas::matrixmatrixmultiply<Precision>(a, 1, m, 1, n, false, t2, 1, n, 1, n, true, amp::ampf<Precision>("1.0"), u, 1, m, 1, n, amp::ampf<Precision>("0.0"), work);
8165                 }
8166                 return result;
8167             }
8168         }
8169 
8170         //
8171         // N much larger than M
8172         // Use bidiagonal reduction with LQ-decomposition
8173         //
8174         if( n>amp::ampf<Precision>("1.6")*m )
8175         {
8176             if( vtneeded==0 )
8177             {
8178 
8179                 //
8180                 // No right singular vectors to be computed
8181                 //
8182                 lq::lqdecomposition<Precision>(a, m, n, tau);
8183                 for(i=1; i<=m-1; i++)
8184                 {
8185                     for(j=i+1; j<=m; j++)
8186                     {
8187                         a(i,j) = 0;
8188                     }
8189                 }
8190                 bidiagonal::tobidiagonal<Precision>(a, m, m, tauq, taup);
8191                 bidiagonal::unpackqfrombidiagonal<Precision>(a, m, m, tauq, ncu, u);
8192                 bidiagonal::unpackdiagonalsfrombidiagonal<Precision>(a, m, m, isupper, w, e);
8193                 work.setbounds(1, m);
8194                 blas::inplacetranspose<Precision>(u, 1, nru, 1, ncu, work);
8195                 result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, m, isupper, false, a, 0, u, nru, vt, 0);
8196                 blas::inplacetranspose<Precision>(u, 1, nru, 1, ncu, work);
8197                 return result;
8198             }
8199             else
8200             {
8201 
8202                 //
8203                 // Right singular vectors (may be full matrix VT) to be computed
8204                 //
8205                 lq::lqdecomposition<Precision>(a, m, n, tau);
8206                 lq::unpackqfromlq<Precision>(a, m, n, tau, nrvt, vt);
8207                 for(i=1; i<=m-1; i++)
8208                 {
8209                     for(j=i+1; j<=m; j++)
8210                     {
8211                         a(i,j) = 0;
8212                     }
8213                 }
8214                 bidiagonal::tobidiagonal<Precision>(a, m, m, tauq, taup);
8215                 bidiagonal::unpackqfrombidiagonal<Precision>(a, m, m, tauq, ncu, u);
8216                 bidiagonal::unpackdiagonalsfrombidiagonal<Precision>(a, m, m, isupper, w, e);
8217                 work.setbounds(1, ap::maxint(m, n));
8218                 blas::inplacetranspose<Precision>(u, 1, nru, 1, ncu, work);
8219                 if( additionalmemory<1 )
8220                 {
8221 
8222                     //
8223                     // No additional memory available
8224                     //
8225                     bidiagonal::multiplybypfrombidiagonal<Precision>(a, m, m, taup, vt, m, n, false, true);
8226                     result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, m, isupper, false, a, 0, u, nru, vt, n);
8227                 }
8228                 else
8229                 {
8230 
8231                     //
8232                     // Large VT. Transforming intermediate matrix T2
8233                     //
8234                     bidiagonal::unpackptfrombidiagonal<Precision>(a, m, m, taup, m, t2);
8235                     result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, m, isupper, false, a, 0, u, nru, t2, m);
8236                     blas::copymatrix<Precision>(vt, 1, m, 1, n, a, 1, m, 1, n);
8237                     blas::matrixmatrixmultiply<Precision>(t2, 1, m, 1, m, false, a, 1, m, 1, n, false, amp::ampf<Precision>("1.0"), vt, 1, m, 1, n, amp::ampf<Precision>("0.0"), work);
8238                 }
8239                 blas::inplacetranspose<Precision>(u, 1, nru, 1, ncu, work);
8240                 return result;
8241             }
8242         }
8243 
8244         //
8245         // M<=N
8246         // We can use inplace transposition of U to get rid of columnwise operations
8247         //
8248         if( m<=n )
8249         {
8250             bidiagonal::tobidiagonal<Precision>(a, m, n, tauq, taup);
8251             bidiagonal::unpackqfrombidiagonal<Precision>(a, m, n, tauq, ncu, u);
8252             bidiagonal::unpackptfrombidiagonal<Precision>(a, m, n, taup, nrvt, vt);
8253             bidiagonal::unpackdiagonalsfrombidiagonal<Precision>(a, m, n, isupper, w, e);
8254             work.setbounds(1, m);
8255             blas::inplacetranspose<Precision>(u, 1, nru, 1, ncu, work);
8256             result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, minmn, isupper, false, a, 0, u, nru, vt, ncvt);
8257             blas::inplacetranspose<Precision>(u, 1, nru, 1, ncu, work);
8258             return result;
8259         }
8260 
8261         //
8262         // Simple bidiagonal reduction
8263         //
8264         bidiagonal::tobidiagonal<Precision>(a, m, n, tauq, taup);
8265         bidiagonal::unpackqfrombidiagonal<Precision>(a, m, n, tauq, ncu, u);
8266         bidiagonal::unpackptfrombidiagonal<Precision>(a, m, n, taup, nrvt, vt);
8267         bidiagonal::unpackdiagonalsfrombidiagonal<Precision>(a, m, n, isupper, w, e);
8268         if( additionalmemory<2 || uneeded==0 )
8269         {
8270 
8271             //
8272             // We cant use additional memory or there is no need in such operations
8273             //
8274             result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, minmn, isupper, false, u, nru, a, 0, vt, ncvt);
8275         }
8276         else
8277         {
8278 
8279             //
8280             // We can use additional memory
8281             //
8282             t2.setbounds(1, minmn, 1, m);
8283             blas::copyandtranspose<Precision>(u, 1, m, 1, minmn, t2, 1, minmn, 1, m);
8284             result = bdsvd::bidiagonalsvddecomposition<Precision>(w, e, minmn, isupper, false, u, 0, t2, m, vt, ncvt);
8285             blas::copyandtranspose<Precision>(t2, 1, minmn, 1, m, u, 1, m, 1, minmn);
8286         }
8287         return result;
8288     }
8289 } // namespace
8290 
8291 #endif
8292 #endif
8293 
8294