1 /*******************************************************************************
2  * Copyright (c) 2018, College of William & Mary
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the College of William & Mary nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COLLEGE OF WILLIAM & MARY BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * PRIMME: https://github.com/primme/primme
28  * Contact: Andreas Stathopoulos, a n d r e a s _at_ c s . w m . e d u
29  *******************************************************************************
30  * File: correction.c
31  *
32  * Purpose - Computes the correction corresponding to each block Ritz vectors.
33  *
34  ******************************************************************************/
35 
36 #ifndef THIS_FILE
37 #define THIS_FILE "../eigs/correction.c"
38 #endif
39 
40 #include "numerical.h"
41 #include "template_normal.h"
42 #include "common_eigs.h"
43 /* Keep automatically generated headers under this section  */
44 #ifndef CHECK_TEMPLATE
45 #include "correction.h"
46 #include "inner_solve.h"
47 #include "auxiliary_eigs.h"
48 #endif
49 
50 #ifdef SUPPORTED_TYPE
51 
52 /*******************************************************************************
53  * Subroutine solve_correction - This routine solves the correction equation
54  *    for each Ritz vector and residual in the block.
55  *
56  * INPUT ARRAYS AND PARAMETERS
57  * ---------------------------
58  * W              The last blockSize vectors of W contain the residuals
59  *
60  * evecs          The converged Ritz vectors.  Array is of dimension numLocked.
61  *
62  * evecsHat       K^{-1}evecs given a preconditioner K.
63  *                (accessed only if skew projector is requested)
64  *
65  * Mfact          The factorization of the matrix evecs'*evecsHat
66  * ipivot         The pivots for the Mfact factorization
67  *
68  * lockedEvals    The locked eigenvalues
69  *
70  * numLocked      The number of locked eigenvalues (zero if not locking)
71  *
72  * numConvergedStored  Number of converged eigenvectors in V that have been
73  *                copied in evecs when no locking is employed, to accommodate
74  *                for a requested skew projection with evecs.
75  *
76  * ritzVals       Array of size basisSize. The Ritz values corresponding to
77  *                the current basis V
78  *
79  * flags          Array indicating which eigenvectors have converged
80  *
81  * basisSize      The size of the basis V
82  *
83  * iev            Array of size blockSize.  The index of each block Ritz value
84  *                in ritzVals (and each Ritz vector in V)
85  *
86  * blockSize      The current block size
87  *
88  * machEps        machine precision
89  *
90  * rwork          Real workspace of size
91  *                3*maxEvecsSize + 2*primme->maxBlockSize
92  *                + (primme->numEvals+primme->maxBasisSize)
93  *                        *----------------------------------------------------*
94  *                        | The following are optional and mutually exclusive: |
95  *                        *------------------------------+                     |
96  *                + 4*primme->nLocal + primme->nLocal    | For QMR work and sol|
97  *                + primme->nLocal*primme->maxBlockSize  | OLSEN for KinvBx    |
98  *                                                       *---------------------*
99  *
100  * rworkSize      the size of rwork. If less than needed, func returns needed.
101  *
102  * iwork          Integer workspace of size maxBlockSize
103  *
104  * primme         Structure containing various solver parameters
105  *
106  *
107  * INPUT/OUTPUT ARRAYS AND PARAMETERS
108  * ----------------------------------
109  * V              The orthonormal basis.  The last blockSize vectors of V
110  *                contain the Ritz vectors
111  *
112  * blockNorms     On input, residual norms of the Ritz vectors computed
113  *                during the current outer iteration.
114  *                On output, the approximate residual norms of the corrected
115  *                Ritz vectors (through JDQMR only).
116  *
117  *
118  * prevRitzVals   Array of size numPrevRitzVals.  The Ritz values from the
119  *                previous iteration and converged Ritz values
120  *
121  * numPrevRitzVals  The of size prevRitzVals updated every outer step
122  *
123  * touch            Parameter used in inner solve stopping criteria
124  *
125  * Return Value
126  * ------------
127  * int  Error code: 0 upon success, nonzero otherwise
128  *                 -1 innner solver failure
129  *                 >0 the needed rworkSize, if the given was not enough
130  *
131  ******************************************************************************/
132 
133 TEMPLATE_PLEASE
134 int solve_correction_Sprimme(SCALAR *V, PRIMME_INT ldV, SCALAR *W,
135       PRIMME_INT ldW, SCALAR *BV, PRIMME_INT ldBV, SCALAR *evecs,
136       PRIMME_INT ldevecs, SCALAR *Bevecs, PRIMME_INT ldBevecs, SCALAR *evecsHat,
137       PRIMME_INT ldevecsHat, HSCALAR *Mfact, int *ipivot, HEVAL *lockedEvals,
138       int numLocked, int numConvergedStored, HEVAL *ritzVals,
139       HEVAL *prevRitzVals, int *numPrevRitzVals, int *flags, int basisSize,
140       HREAL *blockNorms, int *iev, int blockSize, int *touch, double startTime,
141       primme_context ctx) {
142 
143    KIND(, (void)lockedEvals);
144    KIND(, (void)flags);
145    KIND(, (void)evecs);
146    KIND(, (void)ldevecs);
147    KIND(, (void)Bevecs);
148    KIND(, (void)ldBevecs);
149    KIND(, (void)evecsHat);
150    KIND(, (void)ldevecsHat);
151    KIND(, (void)Mfact);
152    KIND(, (void)ipivot);
153    KIND(, (void)numConvergedStored);
154    KIND(, (void)touch);
155    KIND(, (void)startTime);
156 
157    primme_params *primme = ctx.primme;
158    int blockIndex; /* Loop index.  Ranges from 0..blockSize-1.       */
159    int *ilev;      /* Array of size blockSize.  Maps the target Ritz */
160                    /* values to their positions in the sortedEvals   */
161                    /* array.                                         */
162 
163    HEVAL *sortedRitzVals; /* Sorted array of current and converged Ritz     */
164                           /* values.  Size of array is numLocked+basisSize. */
165    KIND(double, PRIMME_COMPLEX_DOUBLE) *
166          blockOfShifts;   /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/
167    HREAL *approxOlsenEps; /* Shifts for approximate Olsen implementation    */
168 
169    CHKERR(KIND(Num_malloc_dprimme, Num_malloc_zprimme)(
170          blockSize, &blockOfShifts, ctx));
171    CHKERR(Num_malloc_RHprimme(blockSize, &approxOlsenEps, ctx));
172 
173    /*------------------------------------------------------------*/
174    /*  Figuring out preconditioning shifts  (robust, Olsen, etc) */
175    /*------------------------------------------------------------*/
176    /* blockOfShifts will contain the preconditioning shifts:               */
177    /* either Ritz values or robustShifts computed below. These shifts      */
178    /* will be used in the correction equations or in inverting (K-sigma I) */
179    /* approxOlsenEps will contain error approximations for eigenavalues    */
180    /* to be used for Olsen's method (when innerIterations =0).             */
181 
182 #ifdef USE_HERMITIAN
183    if (primme->locking && (primme->target == primme_smallest ||
184                                 primme->target == primme_largest)) {
185       /* Combine the sorted list of locked Ritz values with the sorted  */
186       /* list of current Ritz values, ritzVals.  The merging of the two */
187       /* lists lockedEvals and ritzVals is stored in sortedRitzVals.    */
188 
189       CHKERR(Num_malloc_RHprimme(numLocked + basisSize, &sortedRitzVals, ctx));
190       CHKERR(Num_malloc_iprimme(blockSize, &ilev, ctx));
191       mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize,
192             sortedRitzVals, ilev, blockSize, primme);
193    } else
194 #endif /* USE_HERMITIAN */
195    {
196       /* In the case of soft-locking or when we look for interior ones  */
197       /* the sorted evals are simply the ritzVals, targeted as iev      */
198 
199       sortedRitzVals = ritzVals;
200       ilev = iev;
201    }
202 
203    // For interior pairs we an approach based on the target shift given by the
204    // user. For Rayleigh-Ritz, the Ritz value does not get monotonically closer
205    // to the exact value, and it is not trusted as a good shift until its
206    // residual vector norm is small. We capture this hint in the following
207    // heuristic: take the closest point in the interval Ritz value +- residual
208    // norm to the target as the shift. For refined extraction,
209    //     |Ritz value - target| <= the Ritz singular value.
210    // So we use the Ritz value as the shift as soon as the Ritz value is closer
211    // to the Ritz singular value than to the target.
212 
213    if (primme->target != primme_smallest && primme->target != primme_largest) {
214 
215       for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
216 
217          double targetShift =
218                primme->numTargetShifts > 0
219                      ? primme->targetShifts[min(
220                              primme->numTargetShifts - 1, numLocked)]
221                      : 0.0;
222          int sortedIndex = ilev[blockIndex];
223          if (EVAL_ABS(sortedRitzVals[sortedIndex] - (HEVAL)targetShift) <
224                blockNorms[blockIndex] * sqrt(primme->stats.estimateInvBNorm)) {
225             blockOfShifts[blockIndex] = targetShift;
226          } else if (primme->projectionParams.projection ==
227                     primme_proj_refined) {
228             blockOfShifts[blockIndex] = sortedRitzVals[sortedIndex];
229          } else {
230             blockOfShifts[blockIndex] =
231                   sortedRitzVals[sortedIndex] +
232                   ((HEVAL)(blockNorms[blockIndex] *
233                            sqrt(primme->stats.estimateInvBNorm))) *
234                         ((HEVAL)targetShift - sortedRitzVals[sortedIndex]) /
235                         EVAL_ABS(
236                               (HEVAL)targetShift - sortedRitzVals[sortedIndex]);
237          }
238 
239          if (sortedIndex < *numPrevRitzVals) {
240             approxOlsenEps[blockIndex] = EVAL_ABS(
241                   prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
242          }
243          else {
244             approxOlsenEps[blockIndex] =
245                   blockNorms[blockIndex] * sqrt(primme->stats.estimateInvBNorm);
246          }
247       } /* for loop */
248 
249       /* Remember the previous ritz values*/
250       *numPrevRitzVals = basisSize;
251       CHKERR(KIND(Num_copy_RHprimme, Num_copy_SHprimme)(
252             *numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1, ctx));
253 
254    } /* user provided shifts */
255 #ifdef USE_HERMITIAN
256    else {
257       /*-----------------------------------------------------------------*/
258       /* else it is primme_smallest or primme_largest                    */
259       /*-----------------------------------------------------------------*/
260 
261       if (primme->correctionParams.robustShifts) {
262          /*----------------------------------------------------*/
263          /* Subtract/add a robust shift from/to the Ritz value */
264          /*----------------------------------------------------*/
265 
266          /* Find the robust shift for each block vector */
267          for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
268 
269             int sortedIndex = ilev[blockIndex];
270             HREAL eval = sortedRitzVals[sortedIndex];
271 
272             HREAL robustShift = computeRobustShift(blockIndex,
273                   blockNorms[blockIndex], prevRitzVals, *numPrevRitzVals,
274                   sortedRitzVals, &approxOlsenEps[blockIndex],
275                   numLocked + basisSize, ilev, primme);
276 
277             /* Subtract/add the shift if looking for the smallest/largest  */
278             /* eigenvalues, Do not go beyond the previous computed eigval  */
279 
280             if (primme->target == primme_smallest) {
281                blockOfShifts[blockIndex] = eval - robustShift;
282                if (sortedIndex > 0) blockOfShifts[blockIndex] =
283                   max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
284             }
285             else {
286                blockOfShifts[blockIndex] = eval + robustShift;
287                if (sortedIndex > 0) blockOfShifts[blockIndex] =
288                   min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]);
289             } /* robust shifting */
290 
291          }  /* for loop */
292 
293       }  /* endif robust shifts */
294       else {
295          /*--------------------------------------------------------------*/
296          /* Otherwise, the shifts for both preconditioner and correction */
297          /* equation should be just the Ritz values. For Olsen's method, */
298          /* the shifts for r-eps*x, are chosen as the difference in Ritz */
299          /* value between successive iterations.                         */
300          /*--------------------------------------------------------------*/
301 
302          for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
303             int ritzIndex   =  iev[blockIndex];
304             int sortedIndex = ilev[blockIndex];
305             blockOfShifts[blockIndex] = ritzVals[ritzIndex];
306             if (sortedIndex < *numPrevRitzVals) {
307                approxOlsenEps[blockIndex] = fabs(
308                      prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
309             }
310             else {
311                approxOlsenEps[blockIndex] =
312                      blockNorms[blockIndex] *
313                      sqrt(primme->stats.estimateInvBNorm);
314             }
315          } /* for loop */
316       } /* else no robust shifts */
317 
318       /* Remember the previous ritz values*/
319       *numPrevRitzVals = numLocked+basisSize;
320       Num_copy_RHprimme(
321             *numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1, ctx);
322 
323    } /* else primme_smallest or primme_largest */
324 #endif /* USE_HERMITIAN */
325 
326    /* Equip the primme struct with the blockOfShifts, in case the user */
327    /* wants to precondition (K-sigma_i I)^{-1} with a different shift  */
328    /* for each vector                                                  */
329 
330    primme->ShiftsForPreconditioner = (double *)blockOfShifts;
331 
332    /*------------------------------------------------------------ */
333    /*  Generalized Davidson variants -- No inner iterations       */
334    /*------------------------------------------------------------ */
335    if (primme->correctionParams.maxInnerIterations == 0) {
336       /* This is Generalized Davidson or approximate Olsen's method. */
337       /* Perform block preconditioning (with or without projections) */
338 
339       SCALAR *r = &W[ldW*basisSize];    /* All the block residuals    */
340       SCALAR *x = &V[ldV*basisSize];    /* All the block Ritz vectors */
341       SCALAR *Bx = &BV[ldBV*basisSize]; /* B*x                        */
342 
343       if ( primme->correctionParams.projectors.RightX &&
344            primme->correctionParams.projectors.SkewX    ) {
345            /* Compute exact Olsen's projected preconditioner. This is */
346           /* expensive and rarely improves anything! Included for completeness*/
347 
348           Olsen_preconditioner_block(r, ldW, x, ldV, Bx, ldBV, blockSize, ctx);
349       }
350       else {
351          // In general the Olsen's approximation is not useful without
352          // preconditioning. However expanding the basis with r - approxOlsen*x
353          // helps in checking the practical convergence. Check main_iter for
354          // more details.
355 
356          if (primme->correctionParams.projectors.RightX &&
357                ((primme->correctionParams.precondition &&
358                       primme->applyPreconditioner) ||
359                      Bx != x ||
360                      (primme->locking &&
361                            primme->orth == primme_orth_implicit_I))) {
362 #ifdef USE_HERMITIAN
363             /*Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx */
364             /*is approximated by e: Kinvr-e*KinvBx=Kinv(r-e*x)=Kinv(I-ct*x*x')r*/
365 
366             for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
367                /* Compute r_i = r_i - err_i * Bx_i */
368                Num_axpy_Sprimme(primme->nLocal, -approxOlsenEps[blockIndex],
369                      &Bx[ldBV * blockIndex], 1, &r[ldW * blockIndex], 1, ctx);
370             }
371 #else
372             CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
373 #endif
374          }
375 
376          /* GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-eBx) */
377 
378          CHKERR(applyPreconditioner_Sprimme(r, primme->nLocal, ldW,
379                   x, ldV, blockSize, ctx));
380       }
381    }
382    /* ------------------------------------------------------------ */
383    /*  JDQMR --- JD inner-outer variants                           */
384    /* ------------------------------------------------------------ */
385    else {  /* maxInnerIterations > 0  We perform inner-outer JDQMR */
386 #ifdef USE_HERMITIAN
387       int touch0 = *touch;
388 
389       SCALAR *sol, *KinvBx;
390       PRIMME_INT ldsol = primme->ldOPs, ldKinvBx = primme->ldOPs;
391       CHKERR(Num_malloc_Sprimme(ldsol * blockSize, &sol, ctx));
392       CHKERR(Num_malloc_Sprimme(ldKinvBx * blockSize, &KinvBx, ctx));
393 
394 
395       HSCALAR *xKinvBx;  /* Stores x'*K^{-1}Bx if needed    */
396       CHKERR(Num_malloc_SHprimme(blockSize, &xKinvBx, ctx));
397 
398       SCALAR *r = &W[ldW * basisSize];
399       SCALAR *x = &V[ldV * basisSize];
400       SCALAR *Bx = &BV[ldBV * basisSize];
401 
402       /* Set up the left/right/skew projectors for JDQMR.        */
403       /* The pointers Lprojector, Rprojector(Q/X) point to the   */
404       /* appropriate arrays for use in the projection step       */
405 
406       int sizeLprojectorQ; /* Sizes of the various left/right projectors     */
407       int sizeLprojectorX; /* These will be 0/1/or numOrthConstr+numLocked   */
408       int sizeRprojectorQ; /* or numOrthConstr+numConvergedStored w/o locking*/
409       int sizeRprojectorX;
410       SCALAR *LprojectorQ;      /* Q pointer for (I-Q*Q'). Usually points to evecs*/
411       SCALAR *LprojectorX;      /* Usually points to x */
412       SCALAR *LprojectorBQ;     /* BQ pointer for (I-BQ*Q'). Usually points to Bevecs*/
413       SCALAR *LprojectorBX;     /* Usually points to Bx */
414       SCALAR *RprojectorQ;      /* May point to evecs/evecsHat depending on skewQ */
415       SCALAR *RprojectorX;      /* May point to x/Kinvx depending on skewX        */
416       PRIMME_INT ldLprojectorQ; /* The leading dimension of LprojectorQ    */
417       PRIMME_INT ldLprojectorX; /* The leading dimension of LprojectorX    */
418       PRIMME_INT ldLprojectorBQ;/* The leading dimension of LprojectorBQ   */
419       PRIMME_INT ldLprojectorBX;/* The leading dimension of LprojectorBX   */
420       PRIMME_INT ldRprojectorQ; /* The leading dimension of RprojectorQ    */
421       PRIMME_INT ldRprojectorX; /* The leading dimension of RprojectorX    */
422 
423       CHKERR(setup_JD_projectors(x, ldV, Bx, ldBV, evecs, ldevecs, Bevecs,
424             ldBevecs, evecsHat, ldevecsHat, KinvBx, ldKinvBx, xKinvBx,
425             &LprojectorQ, &ldLprojectorQ, &LprojectorX, &ldLprojectorX,
426             &LprojectorBQ, &ldLprojectorBQ, &LprojectorBX, &ldLprojectorBX,
427             &RprojectorQ, &ldRprojectorQ, &RprojectorX, &ldRprojectorX,
428             &sizeLprojectorQ, &sizeLprojectorX, &sizeRprojectorQ,
429             &sizeRprojectorX, numLocked, numConvergedStored, blockSize, ctx));
430 
431       /* Map the index of the block vector to its corresponding eigenvalue */
432       /* index, and the shift for the correction equation. Also make the   */
433       /* shift available to PRIMME, in case (K-shift B)^-1 is needed       */
434 
435       HEVAL *blockRitzVals;
436       CHKERR(KIND(Num_malloc_RHprimme, Num_malloc_SHprimme)(
437             blockSize, &blockRitzVals, ctx));
438       KIND(Num_compact_vecs_RHprimme, Num_compact_vecs_SHprimme)
439       (ritzVals, 1, blockSize, 1, iev, blockRitzVals, 1, 0 /* force copy */,
440             ctx);
441       primme->ShiftsForPreconditioner = (double *)blockOfShifts;
442 
443       /* Pass the original value of touch and update touch as the maximum  */
444       /* value that takes for all inner_solve calls                        */
445       int touch1 = touch0;
446 
447       CHKERR(inner_solve_Sprimme(blockSize, x, ldV, Bx, ldBV, r, ldW,
448             blockNorms, evecs, ldevecs, Mfact, ipivot, xKinvBx, LprojectorQ,
449             ldLprojectorQ, LprojectorX, ldLprojectorX, LprojectorBQ,
450             ldLprojectorBQ, LprojectorBX, ldLprojectorBX, RprojectorQ,
451             ldRprojectorQ, RprojectorX, ldRprojectorX, sizeLprojectorQ,
452             sizeLprojectorX, sizeRprojectorQ, sizeRprojectorX, sol, ldsol,
453             blockRitzVals, blockOfShifts, &touch1, startTime, ctx));
454       *touch = max(*touch, touch1);
455 
456       Num_copy_matrix_Sprimme(sol, primme->nLocal, blockSize, ldsol,
457             &V[ldV * basisSize], ldV, ctx);
458 
459 
460       CHKERR(Num_free_SHprimme(xKinvBx, ctx));
461       CHKERR(Num_free_Sprimme(sol, ctx));
462       CHKERR(Num_free_Sprimme(KinvBx, ctx));
463       CHKERR(KIND(Num_free_RHprimme, Num_free_SHprimme)(blockRitzVals, ctx));
464 #else
465       CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
466 #endif /* USE_HERMITIAN */
467    } /* JDqmr variants */
468 
469    if (sortedRitzVals != ritzVals)
470       CHKERR(KIND(Num_free_RHprimme, Num_free_SHprimme)(sortedRitzVals, ctx));
471    CHKERR(KIND(Num_free_dprimme, Num_free_zprimme)(blockOfShifts, ctx));
472    CHKERR(Num_free_RHprimme(approxOlsenEps, ctx));
473    if (ilev != iev) CHKERR(Num_free_iprimme(ilev, ctx));
474 
475    return 0;
476 
477 }
478 
479 
480 /*******************************************************************************
481  * Subroutine computeRobustShift - This function computes the robust shift
482  *    to be used in the correction equation.  The standard shift is the current
483  *    Ritz value.  The calling routine eventually adds the robust shift,
484  *    epsilon, to the standard shift to accelerate the convergence of the outer
485  *    method.
486  *
487  *    If residual vectors norms are compute with approximate eigenvectors with
488  *    B-norm 1, then the bounds used to estimate the distance between the exact
489  *    and the approximate eigenvalue is as follows [1]:
490  *
491  *    |val - appr. val| <= |r|_inv(B) <= sqrt(|inv(B)|) * |r|_2
492  *
493  *    |val - appr. val| <= (|r|_inv(B))^2 / gap <= |inv(B)| * (|r|_2)^2 / gap
494  *
495  *    [1] http://www.netlib.org/utk/people/JackDongarra/etemplates/node181.html
496  *
497  * INPUT ARRAYS AND PARAMETERS
498  * ---------------------------
499  * blockIndex    Index of the block vector for which the correction will be
500  *               solved.
501  *
502  * resNorm       The residual norm of the current Ritz vector
503  *
504  * prevRitzVals  Sorted array of locked Ritz values and Ritz values from the
505  *               previous outer iteration.  Array size is numPrevRitzVals.
506  *
507  * numPrevRitzVals The number of values in prevRitzVals
508  *
509  * sortedRitzVals  Sorted array of locked Ritz values and current Ritz values.
510  *                 The array is of size numLocked+basisSize
511  *
512  * approxOlsenShift The shift to be used to modify r-shift x, for approx Olsen's
513  *
514  * numSorted     The size of sortedRitzVals.
515  *
516  * ilev          Array of size blockSize that maps targeted Ritz values to
517  *               their position with the array sortedRitzVals.
518  *
519  * Return Value
520  * ------------
521  * double  The value to be added to the current shift.  The calling routine
522  *         will compute shift := shift +/- epsilon.
523  ******************************************************************************/
524 
525 STATIC HREAL computeRobustShift(int blockIndex, double resNorm,
526    HREAL *prevRitzVals, int numPrevRitzVals, HREAL *sortedRitzVals,
527    HREAL *approxOlsenShift, int numSorted, int *ilev, primme_params *primme) {
528 
529    int sortedIndex;                 /* Index of the current Ritz value in */
530                                     /* sortedRitzVals.                    */
531    HREAL gap, lowerGap, upperGap;  /* Gaps between the current and       */
532                                     /* neighboring Ritz Values.           */
533    HREAL delta;                    /* The difference between the current */
534                                     /* Ritz value and its value in the    */
535                                     /* previous iteration.                */
536    HREAL epsilon;                  /* The return value.  The calling     */
537                                     /* routine will modify the shift by   */
538                                     /* this amount.                       */
539 
540    /* If this is the 1st outer iteration, there are no previous */
541    /* Ritz values. Return the residual norm                     */
542 
543    if (primme->stats.numOuterIterations <= 1) {
544       *approxOlsenShift = resNorm * sqrt(primme->stats.estimateInvBNorm);
545       return resNorm * sqrt(primme->stats.estimateInvBNorm);
546    }
547 
548    sortedIndex = ilev[blockIndex];
549 
550    /* Compute the gap when the first eigenvalue with respect to the */
551    /* current basis is to be computed.                              */
552 
553    if (sortedIndex == 0 && numSorted >= 2) {
554       lowerGap = HOST_MACHINE_MAX;
555       gap = fabs(sortedRitzVals[1] - sortedRitzVals[0]);
556    }
557    else if (sortedIndex > 0 && numSorted >= 2 && sortedIndex+1 < numSorted) {
558 
559       /* Take the smaller of the two gaps if an interior eigenvalue is */
560       /* targeted.                                                     */
561 
562       lowerGap = fabs(sortedRitzVals[sortedIndex] -
563                       sortedRitzVals[sortedIndex-1]);
564       upperGap = fabs(sortedRitzVals[sortedIndex+1] -
565                       sortedRitzVals[sortedIndex]);
566       gap = min(lowerGap, upperGap);
567    }
568    else {
569       lowerGap = fabs(sortedRitzVals[sortedIndex] -
570                       sortedRitzVals[sortedIndex-1]);
571       gap = lowerGap;
572    }
573 
574    /* Compute the change in a Ritz value between successive iterations */
575 
576    if (sortedIndex < numPrevRitzVals) {
577       delta = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]);
578    }
579    else {
580       delta = HOST_MACHINE_MAX;
581    }
582 
583    /* Compute epsilon according to the theory of Davis and Kahan described */
584    /* in The Symmetric Eigenvalue Problem by B.N. Parlett.                 */
585 
586    if (gap > resNorm) {
587       epsilon = min(
588             delta, min(resNorm * resNorm * primme->stats.estimateInvBNorm / gap,
589                          lowerGap));
590    }
591    else {
592       epsilon = min(resNorm * sqrt(primme->stats.estimateInvBNorm), lowerGap);
593    }
594 
595    *approxOlsenShift = min(delta, epsilon);
596 
597    /* If the above is too large a shift set it to a milder shift */
598    /* epsilon = min(delta, epsilon); */
599 
600    return epsilon;
601 
602 }
603 
604 
605 /*******************************************************************************
606  * Subroutine mergeSort -- This routine merges the sorted lockedEvals array and
607  *   the sorted ritzVals array into a single sorted array, sortedRitzVals.
608  *   It is only called for extreme (largest, smallest) eigenvalues, not for
609  *   interior ones. Thus it does not cover the interior case.
610  *
611  * INPUT ARRAYS AND PARAMETERS
612  * ---------------------------
613  * lockedEvals  Array of size numLocked.  Ritz values that have been locked.
614  *
615  * numLocked    The size of array lockedEvals.
616  *
617  * ritzVals     The current Ritz values.  They are the eigenvalues of the
618  *              projection V'*A*V.
619  *
620  * flags        Array of flags indicating which Ritz values in ritzVals are
621  *              converged.
622  *
623  * basisSize    The current size of the basis V.
624  *
625  * ilev         Maps the blockSize targeted Ritz values to their position
626  *              within sortedRitzVals.
627  *
628  * blockSize    The number of Ritz values targeted during the current outer
629  *              iteration.
630  *
631  * primme       A structure containing various solver parameters
632  *
633  * INPUT/OUTPUT ARRAYS AND PARAMETERS
634  * ----------------------------------
635  * sortedRitzVals  The array lockedEvals and ritzVals are merged into.
636  *
637  ******************************************************************************/
638 
639 STATIC void mergeSort(HREAL *lockedEvals, int numLocked, HREAL *ritzVals,
640    int *flags, int basisSize, HREAL *sortedRitzVals, int *ilev, int blockSize,
641    primme_params *primme) {
642 
643    int count;         /* The number of Ritz values merged        */
644    int eval, ritzVal; /* The index of the next locked Ritz value */
645                       /*   and current Ritz values to be merged  */
646    int blockIndex;    /* Counter used to index ilev              */
647 
648    count = 0;
649    eval = 0;
650    ritzVal = 0;
651    blockIndex = 0;
652 
653    /* Continue merging until all elements have been merged */
654 
655    while (count < numLocked+basisSize) {
656 
657       /* I. The list of locked eigenvalues has been completely merged, or  */
658       /* if ritzVals[ritzVal] is greater/less than lockedEvals[eval], then */
659       /* merge ritzVals[ritzVal] into the list of sorted Ritz values.      */
660       /*                                                                   */
661       /* II. If the list of current Ritz values has been completely merged,*/
662       /* or if lockedEvals[eval] greater/less than ritzVals[ritzVal], then */
663       /* merge lockedEvals[eval] into the list of sorted Ritz values.      */
664 
665       if (eval >= numLocked ||
666             (ritzVal < basisSize &&
667                   ((primme->target == primme_largest &&
668                          ritzVals[ritzVal] >= lockedEvals[eval]) ||
669                         (primme->target == primme_smallest &&
670                               ritzVals[ritzVal] <= lockedEvals[eval])))) {
671          sortedRitzVals[count] = ritzVals[ritzVal];
672 
673          /* If the Ritz value just merged is unconverged and there is room */
674          /* enough in the block to target it, then keep track of its index */
675          /* in the sorted list.                                            */
676 
677          if (blockIndex < blockSize && flags[ritzVal] == UNCONVERGED) {
678             ilev[blockIndex] = count;
679             blockIndex++;
680          }
681 
682          ritzVal++;
683       }
684       else if (ritzVal >= basisSize || (primme->target == primme_largest &&
685                                        lockedEvals[eval] >= ritzVals[ritzVal])
686                                    || (primme->target == primme_smallest &&
687                                        lockedEvals[eval] <= ritzVals[ritzVal])
688               )
689       {
690          sortedRitzVals[count] = lockedEvals[eval];
691          eval++;
692       }
693 
694       count++;
695    }
696 
697 }
698 
699 /*******************************************************************************
700  * Subroutine Olsen_preconditioner_block - This subroutine applies the projected
701  *    preconditioner to a block of blockSize vectors r by computing:
702  *       (I - (K^{-1} B x_i x_i^T) / (x_i^T K^{-1} B x_i) ) K^{-1}r_i
703  *
704  * Input Parameters
705  * ----------------
706  * r          The vectors the preconditioner and projection will be applied to.
707  *
708  * blockSize  The number of vectors in r, x
709  *
710  * primme       Structure containing various solver parameters
711  *
712  * Output parameters
713  * -----------------
714  * x The current Ritz vector used in the projection. It also carries the result.
715  *
716  ******************************************************************************/
717 
718 STATIC int Olsen_preconditioner_block(SCALAR *r, PRIMME_INT ldr, SCALAR *x,
719       PRIMME_INT ldx, SCALAR *Bx, PRIMME_INT ldBx, int blockSize,
720       primme_context ctx) {
721 
722    primme_params *primme = ctx.primme;
723 
724    /* Allocate workspaces */
725 
726    SCALAR *KinvBxr;     /* Contain K^{-1} * [Bx r] */
727    PRIMME_INT ldKinvBxr = primme->ldOPs;
728    CHKERR(Num_malloc_Sprimme(ldKinvBxr * blockSize * 2, &KinvBxr, ctx));
729    SCALAR *KinvBx = KinvBxr, *Kinvr = &KinvBxr[ldKinvBxr * blockSize];
730    HSCALAR *xKinvBx, *xKinvr;
731    CHKERR(Num_malloc_SHprimme(blockSize * 2, &xKinvBx, ctx));
732    xKinvr = xKinvBx + blockSize;
733 
734    /*------------------------------------------------------------------ */
735    /* Compute K^{-1}Bx and K^{-1}r                                      */
736    /*------------------------------------------------------------------ */
737 
738    CHKERR(applyPreconditioner_Sprimme(
739          Bx, primme->nLocal, ldBx, KinvBx, ldKinvBxr, blockSize, ctx));
740 
741    CHKERR(applyPreconditioner_Sprimme(
742          r, primme->nLocal, ldr, Kinvr, ldKinvBxr, blockSize, ctx));
743 
744    /*---------------------------------------------------------------------- */
745    /* Compute local x'K^{-1}Bx and x'K^{-1}r for each vector                */
746    /*---------------------------------------------------------------------- */
747 
748    CHKERR(Num_dist_dots_Sprimme(x, ldx, KinvBx, ldKinvBxr, primme->nLocal,
749          blockSize, xKinvBx, ctx));
750 
751    CHKERR(Num_dist_dots_Sprimme(x, ldx, Kinvr, ldKinvBxr, primme->nLocal,
752          blockSize, xKinvr, ctx));
753 
754    /*------------------------------------------------------------------*/
755    /* Compute K^(-1)r  - ( xKinvr/xKinvBx ) K^(-1)r for each vector     */
756    /*------------------------------------------------------------------*/
757 
758    int blockIndex;
759    for (blockIndex = 0; blockIndex < blockSize; blockIndex++) {
760       CHKERR(Num_copy_matrix_Sprimme(&Kinvr[ldKinvBxr * blockIndex],
761             primme->nLocal, 1, ldKinvBxr, &x[ldx * blockIndex], ldx, ctx));
762 
763       if (ABS(xKinvBx[blockIndex]) > 0.0) {
764          HSCALAR alpha;
765          alpha = -xKinvr[blockIndex] / xKinvBx[blockIndex];
766          Num_axpy_Sprimme(primme->nLocal, alpha,
767                &KinvBx[ldKinvBxr * blockIndex], 1, &x[ldx * blockIndex], 1,
768                ctx);
769       }
770    }
771 
772    CHKERR(Num_free_Sprimme(KinvBxr, ctx));
773    CHKERR(Num_free_SHprimme(xKinvBx, ctx));
774 
775    return 0;
776 
777 }
778 
779 /*******************************************************************************
780  *   subroutine setup_JD_projectors()
781  *
782  *   Sets up whether to use any of the following projections in the
783  *   correction equation:
784  *
785  *  INPUT
786  *  -----
787  *   x                The Ritz vector
788  *   Bx               B*x
789  *   evecs            Converged locked eigenvectors (denoted as Q herein)
790  *   Bevecs           B*evecs
791  *   evecsHat         K^{-1}*B*evecs
792  *   numLocked        Number of locked eigenvectors (if locking)
793  *   numConverged     Number of converged e-vectors copied in evecs (no locking)
794  *   primme           The main data structures that contains the choices for
795  *
796  *       primme->LeftQ  : evecs in the left projector
797  *       primme->LeftX  : x in the left projector
798  *       primme->RightQ : evecs in the right projector
799  *       primme->RightX : x in the right projector
800  *       primme->SkewQ  : evecs in the right one, should be skewed
801  *       primme->SkewX  : x in the right one, should be skewed
802  *
803  *  OUTPUT
804  *  ------
805  *  *KinvBx           The result of K^{-1}Bx (if needed, otherwise NULL)
806  * **LprojectorQ      Pointer to the left projector for Q (could be NULL)
807  * **LprojectorX      Pointer to the left projector for x (could be NULL)
808  * **LprojectorBQ     Pointer to the left projector for BQ (could be NULL)
809  * **LprojectorBX     Pointer to the left projector for Bx (could be NULL)
810  * **RprojectorQ      Pointer to the right projector for Q (could be NULL)
811  * **RprojectorX      Pointer to the right projector for X (could be NULL)
812  *   sizeLprojectorQ  Size of the left projector Q(numConverged/numLocked)/+1 or 0
813  *   sizeLprojectorX  Size of the left projector X (blockSize or 0)
814  *   sizeRprojectorQ  Size of the Q right projector (numConverged/numLocked or 0)
815  *   sizeRprojectorX  Size of the X right projector (blockSize or 0)
816  *
817  * ============================================================================
818  * Functionality:
819  *
820  *   Q = locked vectors
821  *   x = Ritz vector
822  *   K = preconditioner
823  *   r = a vector to apply the
824  *
825  *   (I-BQQ')(I-Bxx)(A-shift B)(I-KBx(x'KBx)^(-1)x')(I-KBQ(Q'KBQ)^(-1)Q') K
826  *    -----  -----               -------------------  -------------------
827  *     Lq     Lx                   Rx  Sx              Rq Sq
828  *
829  * Control parameters from primme (shortened here, see above for full names):
830  *
831  *     Lq = 1/0 whether to apply the left projector I-QQ'
832  *     Lx = 1/0 whether to apply the left projector I-xx'
833  *     Rx = 1/0 whether to apply the right projector with x (skew or orthogonal)
834  *     Rq = 1/0 whether to apply the right projector with Q (skew or orthogonal)
835  *     if (Rx == 1)
836  *        Sx = 1 applies the right skew projector (I-KBx(x'KBx)^(-1)x')
837  *        Sx = 0 applies the right orthogonal projector (I-Bxx')
838  *     if (Rq == 1)
839  *        Sq = 1 applies the right skew projector (I-KQ(Q'K Q)^(-1)Q')
840  *        Sq = 0 applies the right orthogonal projector (I-BQQ')
841  *
842  * Examples
843  * Lq,Lx,Rx,Sx,Rq,Sq
844  *  1  1  1  1  1  1  (the above equation)                   full, expensive JD
845  *  0  0  1  1  1  1  (I-KBx(x'KBx)^(-1)x')(I-KBQ(Q'KBQ)^(-1)Q')        classic JD
846  *  0  1  1  1  0  0  (I-Bxx')(A-shift B)(I-KBx(x'KBx)^(-1)x')     JD w/o deflaton
847  *  0  0  0  0  0  0  (A-shift B) K                                  classic GD
848  *  1  1  1  1  0  0  (I-BQQ')(I-Bxx')(A-shift B)(I-KBx(x'KBx)^(-1)x')  RECOMMENDED
849  *  1  1  0  0  0  0  (I-BQQ')(I-Bxx')(A-shift B)                         similar
850  *      Others...
851  *                    Researchers can experiment with other projection schemes,
852  *                    although our experience says they are rarely beneficial
853  *
854  * The left orthogonal projector for x and Q can be performed as one block
855  * containing [Q x]. However, the right projections (if either is skew)
856  * are performed separately for Q and x. There are memory reasons for
857  * doing so, but also we do not have to factor (Q'KBQ) at every outer step;
858  * only when an eval converges.
859  *
860  ******************************************************************************/
861 
862 STATIC int setup_JD_projectors(SCALAR *x, PRIMME_INT ldx, SCALAR *Bx,
863       PRIMME_INT ldBx, SCALAR *evecs, PRIMME_INT ldevecs, SCALAR *Bevecs,
864       PRIMME_INT ldBevecs, SCALAR *evecsHat, PRIMME_INT ldevecsHat,
865       SCALAR *KinvBx, PRIMME_INT ldKinvBx, HSCALAR *xKinvBx,
866       SCALAR **LprojectorQ, PRIMME_INT *ldLprojectorQ, SCALAR **LprojectorX,
867       PRIMME_INT *ldLprojectorX, SCALAR **LprojectorBQ,
868       PRIMME_INT *ldLprojectorBQ, SCALAR **LprojectorBX,
869       PRIMME_INT *ldLprojectorBX, SCALAR **RprojectorQ,
870       PRIMME_INT *ldRprojectorQ, SCALAR **RprojectorX,
871       PRIMME_INT *ldRprojectorX, int *sizeLprojectorQ, int *sizeLprojectorX,
872       int *sizeRprojectorQ, int *sizeRprojectorX, int numLocked,
873       int numConverged, int blockSize, primme_context ctx) {
874 
875    primme_params *primme = ctx.primme;
876    int n, sizeEvecs;
877 
878    *sizeLprojectorQ = 0;
879    *sizeLprojectorX = 0;
880    *sizeRprojectorQ = 0;
881    *sizeRprojectorX = 0;
882    *ldLprojectorQ = 0;
883    *ldLprojectorX = 0;
884    *ldLprojectorBQ = 0;
885    *ldLprojectorBX = 0;
886    *ldRprojectorQ = 0;
887    *ldRprojectorX = 0;
888    *LprojectorQ = NULL;
889    *LprojectorX = NULL;
890    *LprojectorBQ = NULL;
891    *LprojectorBX = NULL;
892    *RprojectorQ = NULL;
893    *RprojectorX = NULL;
894 
895    n = primme->nLocal;
896    if (primme->locking)
897       sizeEvecs = primme->numOrthoConst+numLocked;
898    else
899       sizeEvecs = primme->numOrthoConst+numConverged;
900 
901    /* ---------------------------------------------------------------------------- */
902    /* Set up the left projector arrays. Make x adjacent to Q and Bx adjacent to BQ */
903    /* ---------------------------------------------------------------------------- */
904 
905    if (primme->correctionParams.projectors.LeftQ) {
906 
907       *sizeLprojectorQ = sizeEvecs;
908       *LprojectorQ = evecs;
909       *ldLprojectorQ = ldevecs;
910       *LprojectorBQ = Bevecs;
911       *ldLprojectorBQ = ldBevecs;
912       if (primme->correctionParams.projectors.LeftX) {
913          if (blockSize <= 1) {
914             CHKERR(Num_copy_matrix_Sprimme(x, n, blockSize, ldx,
915                   &evecs[sizeEvecs * ldevecs], ldevecs, ctx));
916             if (Bx != x) {
917                CHKERR(Num_copy_matrix_Sprimme(Bx, n, blockSize, ldBx,
918                      &Bevecs[sizeEvecs * ldBevecs], ldBevecs, ctx));
919             }
920             *sizeLprojectorQ += blockSize;
921          }
922          else {
923             *sizeLprojectorX = blockSize;
924             *LprojectorX = x;
925             *ldLprojectorX = ldx;
926             *LprojectorBX = Bx;
927             *ldLprojectorBX = ldBx;
928          }
929       }
930    }
931    else {
932       if (primme->correctionParams.projectors.LeftX) {
933          *LprojectorX = x;
934          *ldLprojectorX = ldx;
935          *LprojectorBX = Bx;
936          *ldLprojectorBX = ldBx;
937          *sizeLprojectorX = blockSize;
938       }
939    }
940 
941    /* --------------------------------------------------------*/
942    /* Set up the right projector arrays. Q and x separately   */
943    /* --------------------------------------------------------*/
944 
945    /* ------------*/
946    /* First for Q */
947    /* ------------*/
948    if (primme->correctionParams.projectors.RightQ) {
949 
950       if (primme->correctionParams.precondition    &&
951           primme->correctionParams.projectors.SkewQ) {
952          *RprojectorQ = evecsHat;       /* Use the K^(-1)*B*evecs array */
953          *ldRprojectorQ = ldevecsHat;
954       }
955       else {  /* Right Q but not SkewQ */
956          *RprojectorQ = Bevecs;          /* Use just the Bevecs array. */
957          *ldRprojectorQ = ldBevecs;
958       }
959       *sizeRprojectorQ = sizeEvecs;
960 
961    }
962    else { /* if no RightQ projector */
963 
964       *RprojectorQ = NULL;
965       *sizeRprojectorQ = 0;
966    }
967 
968    /* ------------*/
969    /* Then for x  */
970    /* ------------*/
971    if (primme->correctionParams.projectors.RightX) {
972 
973       if (primme->correctionParams.precondition   &&
974           primme->correctionParams.projectors.SkewX) {
975          CHKERR(applyPreconditioner_Sprimme(
976                Bx, primme->nLocal, ldBx, KinvBx, ldKinvBx, blockSize, ctx));
977          *RprojectorX  = KinvBx;
978          *ldRprojectorX  = ldKinvBx;
979          CHKERR(Num_dist_dots_Sprimme(x, ldx, KinvBx, ldKinvBx, primme->nLocal,
980                blockSize, xKinvBx, ctx));
981       }
982       else {
983          *RprojectorX = Bx;
984          *ldRprojectorX  = ldBx;
985          int i;
986          for (i=0; i<blockSize; i++) xKinvBx[i] = 1.0;
987       }
988       *sizeRprojectorX = blockSize;
989    }
990    else {
991       *RprojectorX = NULL;
992       *sizeRprojectorX = 0;
993    }
994 
995    return 0;
996 
997 } /* setup_JD_projectors */
998 
999 #endif /* SUPPORTED_TYPE */
1000