1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17 
18 
19 namespace newarp
20 {
21 
22 
23 template<typename eT, int SelectionRule, typename OpType>
24 inline
25 void
factorise_from(uword from_k,uword to_m,const Col<eT> & fk)26 GenEigsSolver<eT, SelectionRule, OpType>::factorise_from(uword from_k, uword to_m, const Col<eT>& fk)
27   {
28   arma_extra_debug_sigprint();
29 
30   if(to_m <= from_k) { return; }
31 
32   fac_f = fk;
33 
34   Col<eT> w(dim_n, arma_zeros_indicator());
35   eT beta = norm(fac_f);
36   // Keep the upperleft k x k submatrix of H and set other elements to 0
37   fac_H.tail_cols(ncv - from_k).zeros();
38   fac_H.submat(span(from_k, ncv - 1), span(0, from_k - 1)).zeros();
39   for(uword i = from_k; i <= to_m - 1; i++)
40     {
41     bool restart = false;
42     // If beta = 0, then the next V is not full rank
43     // We need to generate a new residual vector that is orthogonal
44     // to the current V, which we call a restart
45     if(beta < eps)
46       {
47       // Generate new random vector for fac_f
48       blas_int idist = 2;
49       blas_int iseed[4] = {1, 3, 5, 7};
50       iseed[0] = (i + 100) % 4095;
51       blas_int n = dim_n;
52       lapack::larnv(&idist, &iseed[0], &n, fac_f.memptr());
53       // f <- f - V * V' * f, so that f is orthogonal to V
54       Mat<eT> Vs(fac_V.memptr(), dim_n, i, false); // First i columns
55       Col<eT> Vf = Vs.t() * fac_f;
56       fac_f -= Vs * Vf;
57       // beta <- ||f||
58       beta = norm(fac_f);
59 
60       restart = true;
61       }
62 
63     // v <- f / ||f||
64     fac_V.col(i) = fac_f / beta; // The (i+1)-th column
65 
66     // Note that H[i+1, i] equals to the unrestarted beta
67     if(restart) { fac_H(i, i - 1) = 0.0; } else { fac_H(i, i - 1) = beta; }
68 
69     // w <- A * v, v = fac_V.col(i)
70     op.perform_op(fac_V.colptr(i), w.memptr());
71     nmatop++;
72 
73     // First i+1 columns of V
74     Mat<eT> Vs(fac_V.memptr(), dim_n, i + 1, false);
75     // h = fac_H(0:i, i)
76     Col<eT> h(fac_H.colptr(i), i + 1, false);
77     // h <- V' * w
78     h = Vs.t() * w;
79 
80     // f <- w - V * h
81     fac_f = w - Vs * h;
82     beta = norm(fac_f);
83 
84     if(beta > 0.717 * norm(h)) { continue; }
85 
86     // f/||f|| is going to be the next column of V, so we need to test
87     // whether V' * (f/||f||) ~= 0
88     Col<eT> Vf = Vs.t() * fac_f;
89     // If not, iteratively correct the residual
90     uword count = 0;
91     while(count < 5 && abs(Vf).max() > approx0 * beta)
92       {
93       // f <- f - V * Vf
94       fac_f -= Vs * Vf;
95       // h <- h + Vf
96       h += Vf;
97       // beta <- ||f||
98       beta = norm(fac_f);
99 
100       Vf = Vs.t() * fac_f;
101       count++;
102       }
103     }
104   }
105 
106 
107 
108 template<typename eT, int SelectionRule, typename OpType>
109 inline
110 void
restart(uword k)111 GenEigsSolver<eT, SelectionRule, OpType>::restart(uword k)
112   {
113   arma_extra_debug_sigprint();
114 
115   if(k >= ncv) { return; }
116 
117   DoubleShiftQR<eT>     decomp_ds(ncv);
118   UpperHessenbergQR<eT> decomp;
119 
120   Mat<eT> Q(ncv, ncv, fill::eye);
121 
122   for(uword i = k; i < ncv; i++)
123     {
124     if(cx_attrib::is_complex(ritz_val(i), eT(0)) && (i < (ncv - 1)) && cx_attrib::is_conj(ritz_val(i), ritz_val(i + 1), eT(0)))
125       {
126       // H - mu * I = Q1 * R1
127       // H <- R1 * Q1 + mu * I = Q1' * H * Q1
128       // H - conj(mu) * I = Q2 * R2
129       // H <- R2 * Q2 + conj(mu) * I = Q2' * H * Q2
130       //
131       // (H - mu * I) * (H - conj(mu) * I) = Q1 * Q2 * R2 * R1 = Q * R
132       eT s = 2 * ritz_val(i).real();
133       eT t = std::norm(ritz_val(i));
134       decomp_ds.compute(fac_H, s, t);
135 
136       // Q -> Q * Qi
137       decomp_ds.apply_YQ(Q);
138       // H -> Q'HQ
139       fac_H = decomp_ds.matrix_QtHQ();
140 
141       i++;
142       }
143     else
144       {
145       // QR decomposition of H - mu * I, mu is real
146       fac_H.diag() -= ritz_val(i).real();
147       decomp.compute(fac_H);
148 
149       // Q -> Q * Qi
150       decomp.apply_YQ(Q);
151       // H -> Q'HQ = RQ + mu * I
152       fac_H = decomp.matrix_RQ();
153       fac_H.diag() += ritz_val(i).real();
154       }
155     }
156 
157   // V -> VQ
158   // Q has some elements being zero
159   // The first (ncv - k + i) elements of the i-th column of Q are non-zero
160   Mat<eT> Vs(dim_n, k + 1, arma_nozeros_indicator());
161   uword nnz;
162   for(uword i = 0; i < k; i++)
163     {
164     nnz = ncv - k + i + 1;
165     Mat<eT> V(fac_V.memptr(), dim_n, nnz, false);
166     Col<eT> q(Q.colptr(i), nnz, false);
167     Col<eT> v(Vs.colptr(i), dim_n, false);
168     v = V * q;
169     }
170 
171   Vs.col(k) = fac_V * Q.col(k);
172   fac_V.head_cols(k + 1) = Vs;
173 
174   Col<eT> fk = fac_f * Q(ncv - 1, k - 1) + fac_V.col(k) * fac_H(k, k - 1);
175   factorise_from(k, ncv, fk);
176   retrieve_ritzpair();
177   }
178 
179 
180 
181 template<typename eT, int SelectionRule, typename OpType>
182 inline
183 uword
num_converged(eT tol)184 GenEigsSolver<eT, SelectionRule, OpType>::num_converged(eT tol)
185   {
186   arma_extra_debug_sigprint();
187 
188   // thresh = tol * max(prec, abs(theta)), theta for ritz value
189   const eT f_norm = arma::norm(fac_f);
190   for(uword i = 0; i < nev; i++)
191     {
192     eT thresh = tol * (std::max)(approx0, std::abs(ritz_val(i)));
193     eT resid = std::abs(ritz_est(i)) * f_norm;
194     ritz_conv[i] = (resid < thresh);
195     }
196 
197   return std::count(ritz_conv.begin(), ritz_conv.end(), true);
198   }
199 
200 
201 
202 template<typename eT, int SelectionRule, typename OpType>
203 inline
204 uword
nev_adjusted(uword nconv)205 GenEigsSolver<eT, SelectionRule, OpType>::nev_adjusted(uword nconv)
206   {
207   arma_extra_debug_sigprint();
208 
209   uword nev_new = nev;
210 
211   for(uword i = nev; i < ncv; i++)
212     {
213     if(std::abs(ritz_est(i)) < eps) { nev_new++; }
214     }
215   // Adjust nev_new again, according to dnaup2.f line 660~674 in ARPACK
216   nev_new += (std::min)(nconv, (ncv - nev_new) / 2);
217   if(nev_new == 1 && ncv >= 6)
218     {
219     nev_new = ncv / 2;
220     }
221   else
222   if(nev_new == 1 && ncv > 3)
223     {
224     nev_new = 2;
225     }
226 
227   if(nev_new > ncv - 2) { nev_new = ncv - 2; }
228 
229   // Increase nev by one if ritz_val[nev - 1] and
230   // ritz_val[nev] are conjugate pairs
231   if(cx_attrib::is_complex(ritz_val(nev_new - 1), eps) && cx_attrib::is_conj(ritz_val(nev_new - 1), ritz_val(nev_new), eps))
232     {
233     nev_new++;
234     }
235 
236   return nev_new;
237   }
238 
239 
240 
241 template<typename eT, int SelectionRule, typename OpType>
242 inline
243 void
retrieve_ritzpair()244 GenEigsSolver<eT, SelectionRule, OpType>::retrieve_ritzpair()
245   {
246   arma_extra_debug_sigprint();
247 
248   UpperHessenbergEigen<eT> decomp(fac_H);
249 
250   Col< std::complex<eT> > evals = decomp.eigenvalues();
251   Mat< std::complex<eT> > evecs = decomp.eigenvectors();
252 
253   SortEigenvalue< std::complex<eT>, SelectionRule > sorting(evals.memptr(), evals.n_elem);
254   std::vector<uword> ind = sorting.index();
255 
256   // Copy the ritz values and vectors to ritz_val and ritz_vec, respectively
257   for(uword i = 0; i < ncv; i++)
258     {
259     ritz_val(i) = evals(ind[i]);
260     ritz_est(i) = evecs(ncv - 1, ind[i]);
261     }
262   for(uword i = 0; i < nev; i++)
263     {
264     ritz_vec.col(i) = evecs.col(ind[i]);
265     }
266   }
267 
268 
269 
270 template<typename eT, int SelectionRule, typename OpType>
271 inline
272 void
sort_ritzpair()273 GenEigsSolver<eT, SelectionRule, OpType>::sort_ritzpair()
274   {
275   arma_extra_debug_sigprint();
276 
277   // SortEigenvalue< std::complex<eT>, EigsSelect::LARGEST_MAGN > sorting(ritz_val.memptr(), nev);
278 
279   // sort Ritz values according to SelectionRule, to be consistent with ARPACK
280   SortEigenvalue< std::complex<eT>, SelectionRule > sorting(ritz_val.memptr(), nev);
281 
282   std::vector<uword> ind = sorting.index();
283 
284   Col< std::complex<eT> > new_ritz_val(ncv,      arma_zeros_indicator()  );
285   Mat< std::complex<eT> > new_ritz_vec(ncv, nev, arma_nozeros_indicator());
286   std::vector<bool>       new_ritz_conv(nev);
287 
288   for(uword i = 0; i < nev; i++)
289     {
290     new_ritz_val(i) = ritz_val(ind[i]);
291     new_ritz_vec.col(i) = ritz_vec.col(ind[i]);
292     new_ritz_conv[i] = ritz_conv[ind[i]];
293     }
294 
295   ritz_val.swap(new_ritz_val);
296   ritz_vec.swap(new_ritz_vec);
297   ritz_conv.swap(new_ritz_conv);
298   }
299 
300 
301 
302 template<typename eT, int SelectionRule, typename OpType>
303 inline
GenEigsSolver(const OpType & op_,uword nev_,uword ncv_)304 GenEigsSolver<eT, SelectionRule, OpType>::GenEigsSolver(const OpType& op_, uword nev_, uword ncv_)
305   : op(op_)
306   , nev(nev_)
307   , dim_n(op.n_rows)
308   , ncv(ncv_ > dim_n ? dim_n : ncv_)
309   , nmatop(0)
310   , niter(0)
311   , eps(std::numeric_limits<eT>::epsilon())
312   , approx0(std::pow(eps, eT(2.0) / 3))
313   {
314   arma_extra_debug_sigprint();
315 
316   arma_debug_check( (nev_ < 1 || nev_ > dim_n - 2),    "newarp::GenEigsSolver: nev must satisfy 1 <= nev <= n - 2, n is the size of matrix" );
317   arma_debug_check( (ncv_ < nev_ + 2 || ncv_ > dim_n), "newarp::GenEigsSolver: ncv must satisfy nev + 2 <= ncv <= n, n is the size of matrix" );
318   }
319 
320 
321 
322 template<typename eT, int SelectionRule, typename OpType>
323 inline
324 void
init(eT * init_resid)325 GenEigsSolver<eT, SelectionRule, OpType>::init(eT* init_resid)
326   {
327   arma_extra_debug_sigprint();
328 
329   // Reset all matrices/vectors to zero
330   fac_V.zeros(dim_n, ncv);
331   fac_H.zeros(ncv, ncv);
332   fac_f.zeros(dim_n);
333   ritz_val.zeros(ncv);
334   ritz_vec.zeros(ncv, nev);
335   ritz_est.zeros(ncv);
336   ritz_conv.assign(nev, false);
337 
338   nmatop = 0;
339   niter = 0;
340 
341   Col<eT> r(init_resid, dim_n, false);
342   // The first column of fac_V
343   Col<eT> v(fac_V.colptr(0), dim_n, false);
344   eT rnorm = norm(r);
345   arma_check( (rnorm < eps), "newarp::GenEigsSolver::init(): initial residual vector cannot be zero" );
346   v = r / rnorm;
347 
348   Col<eT> w(dim_n, arma_zeros_indicator());
349   op.perform_op(v.memptr(), w.memptr());
350   nmatop++;
351 
352   fac_H(0, 0) = dot(v, w);
353   fac_f = w - v * fac_H(0, 0);
354   }
355 
356 
357 
358 template<typename eT, int SelectionRule, typename OpType>
359 inline
360 void
init()361 GenEigsSolver<eT, SelectionRule, OpType>::init()
362   {
363   arma_extra_debug_sigprint();
364 
365   podarray<eT> init_resid(dim_n);
366   blas_int idist = 2;                // Uniform(-1, 1)
367   blas_int iseed[4] = {1, 3, 5, 7};  // Fixed random seed
368   blas_int n = dim_n;
369   lapack::larnv(&idist, &iseed[0], &n, init_resid.memptr());
370   init(init_resid.memptr());
371   }
372 
373 
374 
375 template<typename eT, int SelectionRule, typename OpType>
376 inline
377 uword
compute(uword maxit,eT tol)378 GenEigsSolver<eT, SelectionRule, OpType>::compute(uword maxit, eT tol)
379   {
380   arma_extra_debug_sigprint();
381 
382   // The m-step Arnoldi factorisation
383   factorise_from(1, ncv, fac_f);
384   retrieve_ritzpair();
385   // Restarting
386   uword i, nconv = 0, nev_adj;
387   for(i = 0; i < maxit; i++)
388     {
389     nconv = num_converged(tol);
390     if(nconv >= nev) { break; }
391 
392     nev_adj = nev_adjusted(nconv);
393     restart(nev_adj);
394     }
395   // Sorting results
396   sort_ritzpair();
397 
398   niter = i + 1;
399 
400   return (std::min)(nev, nconv);
401   }
402 
403 
404 
405 template<typename eT, int SelectionRule, typename OpType>
406 inline
407 Col< std::complex<eT> >
eigenvalues()408 GenEigsSolver<eT, SelectionRule, OpType>::eigenvalues()
409   {
410   arma_extra_debug_sigprint();
411 
412   uword nconv = std::count(ritz_conv.begin(), ritz_conv.end(), true);
413   Col< std::complex<eT> > res(nconv, arma_zeros_indicator());
414 
415   if(nconv > 0)
416     {
417     uword j = 0;
418     for(uword i = 0; i < nev; i++)
419       {
420       if(ritz_conv[i])
421         {
422         res(j) = ritz_val(i);
423         j++;
424         }
425       }
426     }
427 
428   return res;
429   }
430 
431 
432 
433 template<typename eT, int SelectionRule, typename OpType>
434 inline
435 Mat< std::complex<eT> >
eigenvectors(uword nvec)436 GenEigsSolver<eT, SelectionRule, OpType>::eigenvectors(uword nvec)
437   {
438   arma_extra_debug_sigprint();
439 
440   uword nconv = std::count(ritz_conv.begin(), ritz_conv.end(), true);
441   nvec = (std::min)(nvec, nconv);
442   Mat< std::complex<eT> > res(dim_n, nvec);
443 
444   if(nvec > 0)
445     {
446     Mat< std::complex<eT> > ritz_vec_conv(ncv, nvec, arma_zeros_indicator());
447     uword j = 0;
448     for(uword i = 0; (i < nev) && (j < nvec); i++)
449       {
450       if(ritz_conv[i])
451         {
452         ritz_vec_conv.col(j) = ritz_vec.col(i);
453         j++;
454         }
455       }
456 
457     res = fac_V * ritz_vec_conv;
458     }
459 
460   return res;
461   }
462 
463 
464 }  // namespace newarp
465