1 /******************************************************************************* 2 * Copyright (c) 2018, College of William & Mary 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * * Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * * Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * * Neither the name of the College of William & Mary nor the 13 * names of its contributors may be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COLLEGE OF WILLIAM & MARY BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * PRIMME: https://github.com/primme/primme 28 * Contact: Andreas Stathopoulos, a n d r e a s _at_ c s . w m . e d u 29 ******************************************************************************* 30 * File: correction.c 31 * 32 * Purpose - Computes the correction corresponding to each block Ritz vectors. 33 * 34 ******************************************************************************/ 35 36 #ifndef THIS_FILE 37 #define THIS_FILE "../eigs/correction.c" 38 #endif 39 40 #include "numerical.h" 41 #include "template_normal.h" 42 #include "common_eigs.h" 43 /* Keep automatically generated headers under this section */ 44 #ifndef CHECK_TEMPLATE 45 #include "correction.h" 46 #include "inner_solve.h" 47 #include "auxiliary_eigs.h" 48 #endif 49 50 #ifdef SUPPORTED_TYPE 51 52 /******************************************************************************* 53 * Subroutine solve_correction - This routine solves the correction equation 54 * for each Ritz vector and residual in the block. 55 * 56 * INPUT ARRAYS AND PARAMETERS 57 * --------------------------- 58 * W The last blockSize vectors of W contain the residuals 59 * 60 * evecs The converged Ritz vectors. Array is of dimension numLocked. 61 * 62 * evecsHat K^{-1}evecs given a preconditioner K. 63 * (accessed only if skew projector is requested) 64 * 65 * Mfact The factorization of the matrix evecs'*evecsHat 66 * ipivot The pivots for the Mfact factorization 67 * 68 * lockedEvals The locked eigenvalues 69 * 70 * numLocked The number of locked eigenvalues (zero if not locking) 71 * 72 * numConvergedStored Number of converged eigenvectors in V that have been 73 * copied in evecs when no locking is employed, to accommodate 74 * for a requested skew projection with evecs. 75 * 76 * ritzVals Array of size basisSize. The Ritz values corresponding to 77 * the current basis V 78 * 79 * flags Array indicating which eigenvectors have converged 80 * 81 * basisSize The size of the basis V 82 * 83 * iev Array of size blockSize. The index of each block Ritz value 84 * in ritzVals (and each Ritz vector in V) 85 * 86 * blockSize The current block size 87 * 88 * machEps machine precision 89 * 90 * rwork Real workspace of size 91 * 3*maxEvecsSize + 2*primme->maxBlockSize 92 * + (primme->numEvals+primme->maxBasisSize) 93 * *----------------------------------------------------* 94 * | The following are optional and mutually exclusive: | 95 * *------------------------------+ | 96 * + 4*primme->nLocal + primme->nLocal | For QMR work and sol| 97 * + primme->nLocal*primme->maxBlockSize | OLSEN for KinvBx | 98 * *---------------------* 99 * 100 * rworkSize the size of rwork. If less than needed, func returns needed. 101 * 102 * iwork Integer workspace of size maxBlockSize 103 * 104 * primme Structure containing various solver parameters 105 * 106 * 107 * INPUT/OUTPUT ARRAYS AND PARAMETERS 108 * ---------------------------------- 109 * V The orthonormal basis. The last blockSize vectors of V 110 * contain the Ritz vectors 111 * 112 * blockNorms On input, residual norms of the Ritz vectors computed 113 * during the current outer iteration. 114 * On output, the approximate residual norms of the corrected 115 * Ritz vectors (through JDQMR only). 116 * 117 * 118 * prevRitzVals Array of size numPrevRitzVals. The Ritz values from the 119 * previous iteration and converged Ritz values 120 * 121 * numPrevRitzVals The of size prevRitzVals updated every outer step 122 * 123 * touch Parameter used in inner solve stopping criteria 124 * 125 * Return Value 126 * ------------ 127 * int Error code: 0 upon success, nonzero otherwise 128 * -1 innner solver failure 129 * >0 the needed rworkSize, if the given was not enough 130 * 131 ******************************************************************************/ 132 133 TEMPLATE_PLEASE 134 int solve_correction_Sprimme(SCALAR *V, PRIMME_INT ldV, SCALAR *W, 135 PRIMME_INT ldW, SCALAR *BV, PRIMME_INT ldBV, SCALAR *evecs, 136 PRIMME_INT ldevecs, SCALAR *Bevecs, PRIMME_INT ldBevecs, SCALAR *evecsHat, 137 PRIMME_INT ldevecsHat, HSCALAR *Mfact, int *ipivot, HEVAL *lockedEvals, 138 int numLocked, int numConvergedStored, HEVAL *ritzVals, 139 HEVAL *prevRitzVals, int *numPrevRitzVals, int *flags, int basisSize, 140 HREAL *blockNorms, int *iev, int blockSize, int *touch, double startTime, 141 primme_context ctx) { 142 143 KIND(, (void)lockedEvals); 144 KIND(, (void)flags); 145 KIND(, (void)evecs); 146 KIND(, (void)ldevecs); 147 KIND(, (void)Bevecs); 148 KIND(, (void)ldBevecs); 149 KIND(, (void)evecsHat); 150 KIND(, (void)ldevecsHat); 151 KIND(, (void)Mfact); 152 KIND(, (void)ipivot); 153 KIND(, (void)numConvergedStored); 154 KIND(, (void)touch); 155 KIND(, (void)startTime); 156 157 primme_params *primme = ctx.primme; 158 int blockIndex; /* Loop index. Ranges from 0..blockSize-1. */ 159 int *ilev; /* Array of size blockSize. Maps the target Ritz */ 160 /* values to their positions in the sortedEvals */ 161 /* array. */ 162 163 HEVAL *sortedRitzVals; /* Sorted array of current and converged Ritz */ 164 /* values. Size of array is numLocked+basisSize. */ 165 KIND(double, PRIMME_COMPLEX_DOUBLE) * 166 blockOfShifts; /* Shifts for (A-shiftI) or (if needed) (K-shiftI)*/ 167 HREAL *approxOlsenEps; /* Shifts for approximate Olsen implementation */ 168 169 CHKERR(KIND(Num_malloc_dprimme, Num_malloc_zprimme)( 170 blockSize, &blockOfShifts, ctx)); 171 CHKERR(Num_malloc_RHprimme(blockSize, &approxOlsenEps, ctx)); 172 173 /*------------------------------------------------------------*/ 174 /* Figuring out preconditioning shifts (robust, Olsen, etc) */ 175 /*------------------------------------------------------------*/ 176 /* blockOfShifts will contain the preconditioning shifts: */ 177 /* either Ritz values or robustShifts computed below. These shifts */ 178 /* will be used in the correction equations or in inverting (K-sigma I) */ 179 /* approxOlsenEps will contain error approximations for eigenavalues */ 180 /* to be used for Olsen's method (when innerIterations =0). */ 181 182 #ifdef USE_HERMITIAN 183 if (primme->locking && (primme->target == primme_smallest || 184 primme->target == primme_largest)) { 185 /* Combine the sorted list of locked Ritz values with the sorted */ 186 /* list of current Ritz values, ritzVals. The merging of the two */ 187 /* lists lockedEvals and ritzVals is stored in sortedRitzVals. */ 188 189 CHKERR(Num_malloc_RHprimme(numLocked + basisSize, &sortedRitzVals, ctx)); 190 CHKERR(Num_malloc_iprimme(blockSize, &ilev, ctx)); 191 mergeSort(lockedEvals, numLocked, ritzVals, flags, basisSize, 192 sortedRitzVals, ilev, blockSize, primme); 193 } else 194 #endif /* USE_HERMITIAN */ 195 { 196 /* In the case of soft-locking or when we look for interior ones */ 197 /* the sorted evals are simply the ritzVals, targeted as iev */ 198 199 sortedRitzVals = ritzVals; 200 ilev = iev; 201 } 202 203 // For interior pairs we an approach based on the target shift given by the 204 // user. For Rayleigh-Ritz, the Ritz value does not get monotonically closer 205 // to the exact value, and it is not trusted as a good shift until its 206 // residual vector norm is small. We capture this hint in the following 207 // heuristic: take the closest point in the interval Ritz value +- residual 208 // norm to the target as the shift. For refined extraction, 209 // |Ritz value - target| <= the Ritz singular value. 210 // So we use the Ritz value as the shift as soon as the Ritz value is closer 211 // to the Ritz singular value than to the target. 212 213 if (primme->target != primme_smallest && primme->target != primme_largest) { 214 215 for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { 216 217 double targetShift = 218 primme->numTargetShifts > 0 219 ? primme->targetShifts[min( 220 primme->numTargetShifts - 1, numLocked)] 221 : 0.0; 222 int sortedIndex = ilev[blockIndex]; 223 if (EVAL_ABS(sortedRitzVals[sortedIndex] - (HEVAL)targetShift) < 224 blockNorms[blockIndex] * sqrt(primme->stats.estimateInvBNorm)) { 225 blockOfShifts[blockIndex] = targetShift; 226 } else if (primme->projectionParams.projection == 227 primme_proj_refined) { 228 blockOfShifts[blockIndex] = sortedRitzVals[sortedIndex]; 229 } else { 230 blockOfShifts[blockIndex] = 231 sortedRitzVals[sortedIndex] + 232 ((HEVAL)(blockNorms[blockIndex] * 233 sqrt(primme->stats.estimateInvBNorm))) * 234 ((HEVAL)targetShift - sortedRitzVals[sortedIndex]) / 235 EVAL_ABS( 236 (HEVAL)targetShift - sortedRitzVals[sortedIndex]); 237 } 238 239 if (sortedIndex < *numPrevRitzVals) { 240 approxOlsenEps[blockIndex] = EVAL_ABS( 241 prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); 242 } 243 else { 244 approxOlsenEps[blockIndex] = 245 blockNorms[blockIndex] * sqrt(primme->stats.estimateInvBNorm); 246 } 247 } /* for loop */ 248 249 /* Remember the previous ritz values*/ 250 *numPrevRitzVals = basisSize; 251 CHKERR(KIND(Num_copy_RHprimme, Num_copy_SHprimme)( 252 *numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1, ctx)); 253 254 } /* user provided shifts */ 255 #ifdef USE_HERMITIAN 256 else { 257 /*-----------------------------------------------------------------*/ 258 /* else it is primme_smallest or primme_largest */ 259 /*-----------------------------------------------------------------*/ 260 261 if (primme->correctionParams.robustShifts) { 262 /*----------------------------------------------------*/ 263 /* Subtract/add a robust shift from/to the Ritz value */ 264 /*----------------------------------------------------*/ 265 266 /* Find the robust shift for each block vector */ 267 for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { 268 269 int sortedIndex = ilev[blockIndex]; 270 HREAL eval = sortedRitzVals[sortedIndex]; 271 272 HREAL robustShift = computeRobustShift(blockIndex, 273 blockNorms[blockIndex], prevRitzVals, *numPrevRitzVals, 274 sortedRitzVals, &approxOlsenEps[blockIndex], 275 numLocked + basisSize, ilev, primme); 276 277 /* Subtract/add the shift if looking for the smallest/largest */ 278 /* eigenvalues, Do not go beyond the previous computed eigval */ 279 280 if (primme->target == primme_smallest) { 281 blockOfShifts[blockIndex] = eval - robustShift; 282 if (sortedIndex > 0) blockOfShifts[blockIndex] = 283 max(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); 284 } 285 else { 286 blockOfShifts[blockIndex] = eval + robustShift; 287 if (sortedIndex > 0) blockOfShifts[blockIndex] = 288 min(blockOfShifts[blockIndex], sortedRitzVals[sortedIndex-1]); 289 } /* robust shifting */ 290 291 } /* for loop */ 292 293 } /* endif robust shifts */ 294 else { 295 /*--------------------------------------------------------------*/ 296 /* Otherwise, the shifts for both preconditioner and correction */ 297 /* equation should be just the Ritz values. For Olsen's method, */ 298 /* the shifts for r-eps*x, are chosen as the difference in Ritz */ 299 /* value between successive iterations. */ 300 /*--------------------------------------------------------------*/ 301 302 for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { 303 int ritzIndex = iev[blockIndex]; 304 int sortedIndex = ilev[blockIndex]; 305 blockOfShifts[blockIndex] = ritzVals[ritzIndex]; 306 if (sortedIndex < *numPrevRitzVals) { 307 approxOlsenEps[blockIndex] = fabs( 308 prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); 309 } 310 else { 311 approxOlsenEps[blockIndex] = 312 blockNorms[blockIndex] * 313 sqrt(primme->stats.estimateInvBNorm); 314 } 315 } /* for loop */ 316 } /* else no robust shifts */ 317 318 /* Remember the previous ritz values*/ 319 *numPrevRitzVals = numLocked+basisSize; 320 Num_copy_RHprimme( 321 *numPrevRitzVals, sortedRitzVals, 1, prevRitzVals, 1, ctx); 322 323 } /* else primme_smallest or primme_largest */ 324 #endif /* USE_HERMITIAN */ 325 326 /* Equip the primme struct with the blockOfShifts, in case the user */ 327 /* wants to precondition (K-sigma_i I)^{-1} with a different shift */ 328 /* for each vector */ 329 330 primme->ShiftsForPreconditioner = (double *)blockOfShifts; 331 332 /*------------------------------------------------------------ */ 333 /* Generalized Davidson variants -- No inner iterations */ 334 /*------------------------------------------------------------ */ 335 if (primme->correctionParams.maxInnerIterations == 0) { 336 /* This is Generalized Davidson or approximate Olsen's method. */ 337 /* Perform block preconditioning (with or without projections) */ 338 339 SCALAR *r = &W[ldW*basisSize]; /* All the block residuals */ 340 SCALAR *x = &V[ldV*basisSize]; /* All the block Ritz vectors */ 341 SCALAR *Bx = &BV[ldBV*basisSize]; /* B*x */ 342 343 if ( primme->correctionParams.projectors.RightX && 344 primme->correctionParams.projectors.SkewX ) { 345 /* Compute exact Olsen's projected preconditioner. This is */ 346 /* expensive and rarely improves anything! Included for completeness*/ 347 348 Olsen_preconditioner_block(r, ldW, x, ldV, Bx, ldBV, blockSize, ctx); 349 } 350 else { 351 // In general the Olsen's approximation is not useful without 352 // preconditioning. However expanding the basis with r - approxOlsen*x 353 // helps in checking the practical convergence. Check main_iter for 354 // more details. 355 356 if (primme->correctionParams.projectors.RightX && 357 ((primme->correctionParams.precondition && 358 primme->applyPreconditioner) || 359 Bx != x || 360 (primme->locking && 361 primme->orth == primme_orth_implicit_I))) { 362 #ifdef USE_HERMITIAN 363 /*Compute a cheap approximation to OLSENS, where (x'Kinvr)/xKinvx */ 364 /*is approximated by e: Kinvr-e*KinvBx=Kinv(r-e*x)=Kinv(I-ct*x*x')r*/ 365 366 for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { 367 /* Compute r_i = r_i - err_i * Bx_i */ 368 Num_axpy_Sprimme(primme->nLocal, -approxOlsenEps[blockIndex], 369 &Bx[ldBV * blockIndex], 1, &r[ldW * blockIndex], 1, ctx); 370 } 371 #else 372 CHKERR(PRIMME_FUNCTION_UNAVAILABLE); 373 #endif 374 } 375 376 /* GD: compute K^{-1}r , or approx.Olsen: K^{-1}(r-eBx) */ 377 378 CHKERR(applyPreconditioner_Sprimme(r, primme->nLocal, ldW, 379 x, ldV, blockSize, ctx)); 380 } 381 } 382 /* ------------------------------------------------------------ */ 383 /* JDQMR --- JD inner-outer variants */ 384 /* ------------------------------------------------------------ */ 385 else { /* maxInnerIterations > 0 We perform inner-outer JDQMR */ 386 #ifdef USE_HERMITIAN 387 int touch0 = *touch; 388 389 SCALAR *sol, *KinvBx; 390 PRIMME_INT ldsol = primme->ldOPs, ldKinvBx = primme->ldOPs; 391 CHKERR(Num_malloc_Sprimme(ldsol * blockSize, &sol, ctx)); 392 CHKERR(Num_malloc_Sprimme(ldKinvBx * blockSize, &KinvBx, ctx)); 393 394 395 HSCALAR *xKinvBx; /* Stores x'*K^{-1}Bx if needed */ 396 CHKERR(Num_malloc_SHprimme(blockSize, &xKinvBx, ctx)); 397 398 SCALAR *r = &W[ldW * basisSize]; 399 SCALAR *x = &V[ldV * basisSize]; 400 SCALAR *Bx = &BV[ldBV * basisSize]; 401 402 /* Set up the left/right/skew projectors for JDQMR. */ 403 /* The pointers Lprojector, Rprojector(Q/X) point to the */ 404 /* appropriate arrays for use in the projection step */ 405 406 int sizeLprojectorQ; /* Sizes of the various left/right projectors */ 407 int sizeLprojectorX; /* These will be 0/1/or numOrthConstr+numLocked */ 408 int sizeRprojectorQ; /* or numOrthConstr+numConvergedStored w/o locking*/ 409 int sizeRprojectorX; 410 SCALAR *LprojectorQ; /* Q pointer for (I-Q*Q'). Usually points to evecs*/ 411 SCALAR *LprojectorX; /* Usually points to x */ 412 SCALAR *LprojectorBQ; /* BQ pointer for (I-BQ*Q'). Usually points to Bevecs*/ 413 SCALAR *LprojectorBX; /* Usually points to Bx */ 414 SCALAR *RprojectorQ; /* May point to evecs/evecsHat depending on skewQ */ 415 SCALAR *RprojectorX; /* May point to x/Kinvx depending on skewX */ 416 PRIMME_INT ldLprojectorQ; /* The leading dimension of LprojectorQ */ 417 PRIMME_INT ldLprojectorX; /* The leading dimension of LprojectorX */ 418 PRIMME_INT ldLprojectorBQ;/* The leading dimension of LprojectorBQ */ 419 PRIMME_INT ldLprojectorBX;/* The leading dimension of LprojectorBX */ 420 PRIMME_INT ldRprojectorQ; /* The leading dimension of RprojectorQ */ 421 PRIMME_INT ldRprojectorX; /* The leading dimension of RprojectorX */ 422 423 CHKERR(setup_JD_projectors(x, ldV, Bx, ldBV, evecs, ldevecs, Bevecs, 424 ldBevecs, evecsHat, ldevecsHat, KinvBx, ldKinvBx, xKinvBx, 425 &LprojectorQ, &ldLprojectorQ, &LprojectorX, &ldLprojectorX, 426 &LprojectorBQ, &ldLprojectorBQ, &LprojectorBX, &ldLprojectorBX, 427 &RprojectorQ, &ldRprojectorQ, &RprojectorX, &ldRprojectorX, 428 &sizeLprojectorQ, &sizeLprojectorX, &sizeRprojectorQ, 429 &sizeRprojectorX, numLocked, numConvergedStored, blockSize, ctx)); 430 431 /* Map the index of the block vector to its corresponding eigenvalue */ 432 /* index, and the shift for the correction equation. Also make the */ 433 /* shift available to PRIMME, in case (K-shift B)^-1 is needed */ 434 435 HEVAL *blockRitzVals; 436 CHKERR(KIND(Num_malloc_RHprimme, Num_malloc_SHprimme)( 437 blockSize, &blockRitzVals, ctx)); 438 KIND(Num_compact_vecs_RHprimme, Num_compact_vecs_SHprimme) 439 (ritzVals, 1, blockSize, 1, iev, blockRitzVals, 1, 0 /* force copy */, 440 ctx); 441 primme->ShiftsForPreconditioner = (double *)blockOfShifts; 442 443 /* Pass the original value of touch and update touch as the maximum */ 444 /* value that takes for all inner_solve calls */ 445 int touch1 = touch0; 446 447 CHKERR(inner_solve_Sprimme(blockSize, x, ldV, Bx, ldBV, r, ldW, 448 blockNorms, evecs, ldevecs, Mfact, ipivot, xKinvBx, LprojectorQ, 449 ldLprojectorQ, LprojectorX, ldLprojectorX, LprojectorBQ, 450 ldLprojectorBQ, LprojectorBX, ldLprojectorBX, RprojectorQ, 451 ldRprojectorQ, RprojectorX, ldRprojectorX, sizeLprojectorQ, 452 sizeLprojectorX, sizeRprojectorQ, sizeRprojectorX, sol, ldsol, 453 blockRitzVals, blockOfShifts, &touch1, startTime, ctx)); 454 *touch = max(*touch, touch1); 455 456 Num_copy_matrix_Sprimme(sol, primme->nLocal, blockSize, ldsol, 457 &V[ldV * basisSize], ldV, ctx); 458 459 460 CHKERR(Num_free_SHprimme(xKinvBx, ctx)); 461 CHKERR(Num_free_Sprimme(sol, ctx)); 462 CHKERR(Num_free_Sprimme(KinvBx, ctx)); 463 CHKERR(KIND(Num_free_RHprimme, Num_free_SHprimme)(blockRitzVals, ctx)); 464 #else 465 CHKERR(PRIMME_FUNCTION_UNAVAILABLE); 466 #endif /* USE_HERMITIAN */ 467 } /* JDqmr variants */ 468 469 if (sortedRitzVals != ritzVals) 470 CHKERR(KIND(Num_free_RHprimme, Num_free_SHprimme)(sortedRitzVals, ctx)); 471 CHKERR(KIND(Num_free_dprimme, Num_free_zprimme)(blockOfShifts, ctx)); 472 CHKERR(Num_free_RHprimme(approxOlsenEps, ctx)); 473 if (ilev != iev) CHKERR(Num_free_iprimme(ilev, ctx)); 474 475 return 0; 476 477 } 478 479 480 /******************************************************************************* 481 * Subroutine computeRobustShift - This function computes the robust shift 482 * to be used in the correction equation. The standard shift is the current 483 * Ritz value. The calling routine eventually adds the robust shift, 484 * epsilon, to the standard shift to accelerate the convergence of the outer 485 * method. 486 * 487 * If residual vectors norms are compute with approximate eigenvectors with 488 * B-norm 1, then the bounds used to estimate the distance between the exact 489 * and the approximate eigenvalue is as follows [1]: 490 * 491 * |val - appr. val| <= |r|_inv(B) <= sqrt(|inv(B)|) * |r|_2 492 * 493 * |val - appr. val| <= (|r|_inv(B))^2 / gap <= |inv(B)| * (|r|_2)^2 / gap 494 * 495 * [1] http://www.netlib.org/utk/people/JackDongarra/etemplates/node181.html 496 * 497 * INPUT ARRAYS AND PARAMETERS 498 * --------------------------- 499 * blockIndex Index of the block vector for which the correction will be 500 * solved. 501 * 502 * resNorm The residual norm of the current Ritz vector 503 * 504 * prevRitzVals Sorted array of locked Ritz values and Ritz values from the 505 * previous outer iteration. Array size is numPrevRitzVals. 506 * 507 * numPrevRitzVals The number of values in prevRitzVals 508 * 509 * sortedRitzVals Sorted array of locked Ritz values and current Ritz values. 510 * The array is of size numLocked+basisSize 511 * 512 * approxOlsenShift The shift to be used to modify r-shift x, for approx Olsen's 513 * 514 * numSorted The size of sortedRitzVals. 515 * 516 * ilev Array of size blockSize that maps targeted Ritz values to 517 * their position with the array sortedRitzVals. 518 * 519 * Return Value 520 * ------------ 521 * double The value to be added to the current shift. The calling routine 522 * will compute shift := shift +/- epsilon. 523 ******************************************************************************/ 524 525 STATIC HREAL computeRobustShift(int blockIndex, double resNorm, 526 HREAL *prevRitzVals, int numPrevRitzVals, HREAL *sortedRitzVals, 527 HREAL *approxOlsenShift, int numSorted, int *ilev, primme_params *primme) { 528 529 int sortedIndex; /* Index of the current Ritz value in */ 530 /* sortedRitzVals. */ 531 HREAL gap, lowerGap, upperGap; /* Gaps between the current and */ 532 /* neighboring Ritz Values. */ 533 HREAL delta; /* The difference between the current */ 534 /* Ritz value and its value in the */ 535 /* previous iteration. */ 536 HREAL epsilon; /* The return value. The calling */ 537 /* routine will modify the shift by */ 538 /* this amount. */ 539 540 /* If this is the 1st outer iteration, there are no previous */ 541 /* Ritz values. Return the residual norm */ 542 543 if (primme->stats.numOuterIterations <= 1) { 544 *approxOlsenShift = resNorm * sqrt(primme->stats.estimateInvBNorm); 545 return resNorm * sqrt(primme->stats.estimateInvBNorm); 546 } 547 548 sortedIndex = ilev[blockIndex]; 549 550 /* Compute the gap when the first eigenvalue with respect to the */ 551 /* current basis is to be computed. */ 552 553 if (sortedIndex == 0 && numSorted >= 2) { 554 lowerGap = HOST_MACHINE_MAX; 555 gap = fabs(sortedRitzVals[1] - sortedRitzVals[0]); 556 } 557 else if (sortedIndex > 0 && numSorted >= 2 && sortedIndex+1 < numSorted) { 558 559 /* Take the smaller of the two gaps if an interior eigenvalue is */ 560 /* targeted. */ 561 562 lowerGap = fabs(sortedRitzVals[sortedIndex] - 563 sortedRitzVals[sortedIndex-1]); 564 upperGap = fabs(sortedRitzVals[sortedIndex+1] - 565 sortedRitzVals[sortedIndex]); 566 gap = min(lowerGap, upperGap); 567 } 568 else { 569 lowerGap = fabs(sortedRitzVals[sortedIndex] - 570 sortedRitzVals[sortedIndex-1]); 571 gap = lowerGap; 572 } 573 574 /* Compute the change in a Ritz value between successive iterations */ 575 576 if (sortedIndex < numPrevRitzVals) { 577 delta = fabs(prevRitzVals[sortedIndex] - sortedRitzVals[sortedIndex]); 578 } 579 else { 580 delta = HOST_MACHINE_MAX; 581 } 582 583 /* Compute epsilon according to the theory of Davis and Kahan described */ 584 /* in The Symmetric Eigenvalue Problem by B.N. Parlett. */ 585 586 if (gap > resNorm) { 587 epsilon = min( 588 delta, min(resNorm * resNorm * primme->stats.estimateInvBNorm / gap, 589 lowerGap)); 590 } 591 else { 592 epsilon = min(resNorm * sqrt(primme->stats.estimateInvBNorm), lowerGap); 593 } 594 595 *approxOlsenShift = min(delta, epsilon); 596 597 /* If the above is too large a shift set it to a milder shift */ 598 /* epsilon = min(delta, epsilon); */ 599 600 return epsilon; 601 602 } 603 604 605 /******************************************************************************* 606 * Subroutine mergeSort -- This routine merges the sorted lockedEvals array and 607 * the sorted ritzVals array into a single sorted array, sortedRitzVals. 608 * It is only called for extreme (largest, smallest) eigenvalues, not for 609 * interior ones. Thus it does not cover the interior case. 610 * 611 * INPUT ARRAYS AND PARAMETERS 612 * --------------------------- 613 * lockedEvals Array of size numLocked. Ritz values that have been locked. 614 * 615 * numLocked The size of array lockedEvals. 616 * 617 * ritzVals The current Ritz values. They are the eigenvalues of the 618 * projection V'*A*V. 619 * 620 * flags Array of flags indicating which Ritz values in ritzVals are 621 * converged. 622 * 623 * basisSize The current size of the basis V. 624 * 625 * ilev Maps the blockSize targeted Ritz values to their position 626 * within sortedRitzVals. 627 * 628 * blockSize The number of Ritz values targeted during the current outer 629 * iteration. 630 * 631 * primme A structure containing various solver parameters 632 * 633 * INPUT/OUTPUT ARRAYS AND PARAMETERS 634 * ---------------------------------- 635 * sortedRitzVals The array lockedEvals and ritzVals are merged into. 636 * 637 ******************************************************************************/ 638 639 STATIC void mergeSort(HREAL *lockedEvals, int numLocked, HREAL *ritzVals, 640 int *flags, int basisSize, HREAL *sortedRitzVals, int *ilev, int blockSize, 641 primme_params *primme) { 642 643 int count; /* The number of Ritz values merged */ 644 int eval, ritzVal; /* The index of the next locked Ritz value */ 645 /* and current Ritz values to be merged */ 646 int blockIndex; /* Counter used to index ilev */ 647 648 count = 0; 649 eval = 0; 650 ritzVal = 0; 651 blockIndex = 0; 652 653 /* Continue merging until all elements have been merged */ 654 655 while (count < numLocked+basisSize) { 656 657 /* I. The list of locked eigenvalues has been completely merged, or */ 658 /* if ritzVals[ritzVal] is greater/less than lockedEvals[eval], then */ 659 /* merge ritzVals[ritzVal] into the list of sorted Ritz values. */ 660 /* */ 661 /* II. If the list of current Ritz values has been completely merged,*/ 662 /* or if lockedEvals[eval] greater/less than ritzVals[ritzVal], then */ 663 /* merge lockedEvals[eval] into the list of sorted Ritz values. */ 664 665 if (eval >= numLocked || 666 (ritzVal < basisSize && 667 ((primme->target == primme_largest && 668 ritzVals[ritzVal] >= lockedEvals[eval]) || 669 (primme->target == primme_smallest && 670 ritzVals[ritzVal] <= lockedEvals[eval])))) { 671 sortedRitzVals[count] = ritzVals[ritzVal]; 672 673 /* If the Ritz value just merged is unconverged and there is room */ 674 /* enough in the block to target it, then keep track of its index */ 675 /* in the sorted list. */ 676 677 if (blockIndex < blockSize && flags[ritzVal] == UNCONVERGED) { 678 ilev[blockIndex] = count; 679 blockIndex++; 680 } 681 682 ritzVal++; 683 } 684 else if (ritzVal >= basisSize || (primme->target == primme_largest && 685 lockedEvals[eval] >= ritzVals[ritzVal]) 686 || (primme->target == primme_smallest && 687 lockedEvals[eval] <= ritzVals[ritzVal]) 688 ) 689 { 690 sortedRitzVals[count] = lockedEvals[eval]; 691 eval++; 692 } 693 694 count++; 695 } 696 697 } 698 699 /******************************************************************************* 700 * Subroutine Olsen_preconditioner_block - This subroutine applies the projected 701 * preconditioner to a block of blockSize vectors r by computing: 702 * (I - (K^{-1} B x_i x_i^T) / (x_i^T K^{-1} B x_i) ) K^{-1}r_i 703 * 704 * Input Parameters 705 * ---------------- 706 * r The vectors the preconditioner and projection will be applied to. 707 * 708 * blockSize The number of vectors in r, x 709 * 710 * primme Structure containing various solver parameters 711 * 712 * Output parameters 713 * ----------------- 714 * x The current Ritz vector used in the projection. It also carries the result. 715 * 716 ******************************************************************************/ 717 718 STATIC int Olsen_preconditioner_block(SCALAR *r, PRIMME_INT ldr, SCALAR *x, 719 PRIMME_INT ldx, SCALAR *Bx, PRIMME_INT ldBx, int blockSize, 720 primme_context ctx) { 721 722 primme_params *primme = ctx.primme; 723 724 /* Allocate workspaces */ 725 726 SCALAR *KinvBxr; /* Contain K^{-1} * [Bx r] */ 727 PRIMME_INT ldKinvBxr = primme->ldOPs; 728 CHKERR(Num_malloc_Sprimme(ldKinvBxr * blockSize * 2, &KinvBxr, ctx)); 729 SCALAR *KinvBx = KinvBxr, *Kinvr = &KinvBxr[ldKinvBxr * blockSize]; 730 HSCALAR *xKinvBx, *xKinvr; 731 CHKERR(Num_malloc_SHprimme(blockSize * 2, &xKinvBx, ctx)); 732 xKinvr = xKinvBx + blockSize; 733 734 /*------------------------------------------------------------------ */ 735 /* Compute K^{-1}Bx and K^{-1}r */ 736 /*------------------------------------------------------------------ */ 737 738 CHKERR(applyPreconditioner_Sprimme( 739 Bx, primme->nLocal, ldBx, KinvBx, ldKinvBxr, blockSize, ctx)); 740 741 CHKERR(applyPreconditioner_Sprimme( 742 r, primme->nLocal, ldr, Kinvr, ldKinvBxr, blockSize, ctx)); 743 744 /*---------------------------------------------------------------------- */ 745 /* Compute local x'K^{-1}Bx and x'K^{-1}r for each vector */ 746 /*---------------------------------------------------------------------- */ 747 748 CHKERR(Num_dist_dots_Sprimme(x, ldx, KinvBx, ldKinvBxr, primme->nLocal, 749 blockSize, xKinvBx, ctx)); 750 751 CHKERR(Num_dist_dots_Sprimme(x, ldx, Kinvr, ldKinvBxr, primme->nLocal, 752 blockSize, xKinvr, ctx)); 753 754 /*------------------------------------------------------------------*/ 755 /* Compute K^(-1)r - ( xKinvr/xKinvBx ) K^(-1)r for each vector */ 756 /*------------------------------------------------------------------*/ 757 758 int blockIndex; 759 for (blockIndex = 0; blockIndex < blockSize; blockIndex++) { 760 CHKERR(Num_copy_matrix_Sprimme(&Kinvr[ldKinvBxr * blockIndex], 761 primme->nLocal, 1, ldKinvBxr, &x[ldx * blockIndex], ldx, ctx)); 762 763 if (ABS(xKinvBx[blockIndex]) > 0.0) { 764 HSCALAR alpha; 765 alpha = -xKinvr[blockIndex] / xKinvBx[blockIndex]; 766 Num_axpy_Sprimme(primme->nLocal, alpha, 767 &KinvBx[ldKinvBxr * blockIndex], 1, &x[ldx * blockIndex], 1, 768 ctx); 769 } 770 } 771 772 CHKERR(Num_free_Sprimme(KinvBxr, ctx)); 773 CHKERR(Num_free_SHprimme(xKinvBx, ctx)); 774 775 return 0; 776 777 } 778 779 /******************************************************************************* 780 * subroutine setup_JD_projectors() 781 * 782 * Sets up whether to use any of the following projections in the 783 * correction equation: 784 * 785 * INPUT 786 * ----- 787 * x The Ritz vector 788 * Bx B*x 789 * evecs Converged locked eigenvectors (denoted as Q herein) 790 * Bevecs B*evecs 791 * evecsHat K^{-1}*B*evecs 792 * numLocked Number of locked eigenvectors (if locking) 793 * numConverged Number of converged e-vectors copied in evecs (no locking) 794 * primme The main data structures that contains the choices for 795 * 796 * primme->LeftQ : evecs in the left projector 797 * primme->LeftX : x in the left projector 798 * primme->RightQ : evecs in the right projector 799 * primme->RightX : x in the right projector 800 * primme->SkewQ : evecs in the right one, should be skewed 801 * primme->SkewX : x in the right one, should be skewed 802 * 803 * OUTPUT 804 * ------ 805 * *KinvBx The result of K^{-1}Bx (if needed, otherwise NULL) 806 * **LprojectorQ Pointer to the left projector for Q (could be NULL) 807 * **LprojectorX Pointer to the left projector for x (could be NULL) 808 * **LprojectorBQ Pointer to the left projector for BQ (could be NULL) 809 * **LprojectorBX Pointer to the left projector for Bx (could be NULL) 810 * **RprojectorQ Pointer to the right projector for Q (could be NULL) 811 * **RprojectorX Pointer to the right projector for X (could be NULL) 812 * sizeLprojectorQ Size of the left projector Q(numConverged/numLocked)/+1 or 0 813 * sizeLprojectorX Size of the left projector X (blockSize or 0) 814 * sizeRprojectorQ Size of the Q right projector (numConverged/numLocked or 0) 815 * sizeRprojectorX Size of the X right projector (blockSize or 0) 816 * 817 * ============================================================================ 818 * Functionality: 819 * 820 * Q = locked vectors 821 * x = Ritz vector 822 * K = preconditioner 823 * r = a vector to apply the 824 * 825 * (I-BQQ')(I-Bxx)(A-shift B)(I-KBx(x'KBx)^(-1)x')(I-KBQ(Q'KBQ)^(-1)Q') K 826 * ----- ----- ------------------- ------------------- 827 * Lq Lx Rx Sx Rq Sq 828 * 829 * Control parameters from primme (shortened here, see above for full names): 830 * 831 * Lq = 1/0 whether to apply the left projector I-QQ' 832 * Lx = 1/0 whether to apply the left projector I-xx' 833 * Rx = 1/0 whether to apply the right projector with x (skew or orthogonal) 834 * Rq = 1/0 whether to apply the right projector with Q (skew or orthogonal) 835 * if (Rx == 1) 836 * Sx = 1 applies the right skew projector (I-KBx(x'KBx)^(-1)x') 837 * Sx = 0 applies the right orthogonal projector (I-Bxx') 838 * if (Rq == 1) 839 * Sq = 1 applies the right skew projector (I-KQ(Q'K Q)^(-1)Q') 840 * Sq = 0 applies the right orthogonal projector (I-BQQ') 841 * 842 * Examples 843 * Lq,Lx,Rx,Sx,Rq,Sq 844 * 1 1 1 1 1 1 (the above equation) full, expensive JD 845 * 0 0 1 1 1 1 (I-KBx(x'KBx)^(-1)x')(I-KBQ(Q'KBQ)^(-1)Q') classic JD 846 * 0 1 1 1 0 0 (I-Bxx')(A-shift B)(I-KBx(x'KBx)^(-1)x') JD w/o deflaton 847 * 0 0 0 0 0 0 (A-shift B) K classic GD 848 * 1 1 1 1 0 0 (I-BQQ')(I-Bxx')(A-shift B)(I-KBx(x'KBx)^(-1)x') RECOMMENDED 849 * 1 1 0 0 0 0 (I-BQQ')(I-Bxx')(A-shift B) similar 850 * Others... 851 * Researchers can experiment with other projection schemes, 852 * although our experience says they are rarely beneficial 853 * 854 * The left orthogonal projector for x and Q can be performed as one block 855 * containing [Q x]. However, the right projections (if either is skew) 856 * are performed separately for Q and x. There are memory reasons for 857 * doing so, but also we do not have to factor (Q'KBQ) at every outer step; 858 * only when an eval converges. 859 * 860 ******************************************************************************/ 861 862 STATIC int setup_JD_projectors(SCALAR *x, PRIMME_INT ldx, SCALAR *Bx, 863 PRIMME_INT ldBx, SCALAR *evecs, PRIMME_INT ldevecs, SCALAR *Bevecs, 864 PRIMME_INT ldBevecs, SCALAR *evecsHat, PRIMME_INT ldevecsHat, 865 SCALAR *KinvBx, PRIMME_INT ldKinvBx, HSCALAR *xKinvBx, 866 SCALAR **LprojectorQ, PRIMME_INT *ldLprojectorQ, SCALAR **LprojectorX, 867 PRIMME_INT *ldLprojectorX, SCALAR **LprojectorBQ, 868 PRIMME_INT *ldLprojectorBQ, SCALAR **LprojectorBX, 869 PRIMME_INT *ldLprojectorBX, SCALAR **RprojectorQ, 870 PRIMME_INT *ldRprojectorQ, SCALAR **RprojectorX, 871 PRIMME_INT *ldRprojectorX, int *sizeLprojectorQ, int *sizeLprojectorX, 872 int *sizeRprojectorQ, int *sizeRprojectorX, int numLocked, 873 int numConverged, int blockSize, primme_context ctx) { 874 875 primme_params *primme = ctx.primme; 876 int n, sizeEvecs; 877 878 *sizeLprojectorQ = 0; 879 *sizeLprojectorX = 0; 880 *sizeRprojectorQ = 0; 881 *sizeRprojectorX = 0; 882 *ldLprojectorQ = 0; 883 *ldLprojectorX = 0; 884 *ldLprojectorBQ = 0; 885 *ldLprojectorBX = 0; 886 *ldRprojectorQ = 0; 887 *ldRprojectorX = 0; 888 *LprojectorQ = NULL; 889 *LprojectorX = NULL; 890 *LprojectorBQ = NULL; 891 *LprojectorBX = NULL; 892 *RprojectorQ = NULL; 893 *RprojectorX = NULL; 894 895 n = primme->nLocal; 896 if (primme->locking) 897 sizeEvecs = primme->numOrthoConst+numLocked; 898 else 899 sizeEvecs = primme->numOrthoConst+numConverged; 900 901 /* ---------------------------------------------------------------------------- */ 902 /* Set up the left projector arrays. Make x adjacent to Q and Bx adjacent to BQ */ 903 /* ---------------------------------------------------------------------------- */ 904 905 if (primme->correctionParams.projectors.LeftQ) { 906 907 *sizeLprojectorQ = sizeEvecs; 908 *LprojectorQ = evecs; 909 *ldLprojectorQ = ldevecs; 910 *LprojectorBQ = Bevecs; 911 *ldLprojectorBQ = ldBevecs; 912 if (primme->correctionParams.projectors.LeftX) { 913 if (blockSize <= 1) { 914 CHKERR(Num_copy_matrix_Sprimme(x, n, blockSize, ldx, 915 &evecs[sizeEvecs * ldevecs], ldevecs, ctx)); 916 if (Bx != x) { 917 CHKERR(Num_copy_matrix_Sprimme(Bx, n, blockSize, ldBx, 918 &Bevecs[sizeEvecs * ldBevecs], ldBevecs, ctx)); 919 } 920 *sizeLprojectorQ += blockSize; 921 } 922 else { 923 *sizeLprojectorX = blockSize; 924 *LprojectorX = x; 925 *ldLprojectorX = ldx; 926 *LprojectorBX = Bx; 927 *ldLprojectorBX = ldBx; 928 } 929 } 930 } 931 else { 932 if (primme->correctionParams.projectors.LeftX) { 933 *LprojectorX = x; 934 *ldLprojectorX = ldx; 935 *LprojectorBX = Bx; 936 *ldLprojectorBX = ldBx; 937 *sizeLprojectorX = blockSize; 938 } 939 } 940 941 /* --------------------------------------------------------*/ 942 /* Set up the right projector arrays. Q and x separately */ 943 /* --------------------------------------------------------*/ 944 945 /* ------------*/ 946 /* First for Q */ 947 /* ------------*/ 948 if (primme->correctionParams.projectors.RightQ) { 949 950 if (primme->correctionParams.precondition && 951 primme->correctionParams.projectors.SkewQ) { 952 *RprojectorQ = evecsHat; /* Use the K^(-1)*B*evecs array */ 953 *ldRprojectorQ = ldevecsHat; 954 } 955 else { /* Right Q but not SkewQ */ 956 *RprojectorQ = Bevecs; /* Use just the Bevecs array. */ 957 *ldRprojectorQ = ldBevecs; 958 } 959 *sizeRprojectorQ = sizeEvecs; 960 961 } 962 else { /* if no RightQ projector */ 963 964 *RprojectorQ = NULL; 965 *sizeRprojectorQ = 0; 966 } 967 968 /* ------------*/ 969 /* Then for x */ 970 /* ------------*/ 971 if (primme->correctionParams.projectors.RightX) { 972 973 if (primme->correctionParams.precondition && 974 primme->correctionParams.projectors.SkewX) { 975 CHKERR(applyPreconditioner_Sprimme( 976 Bx, primme->nLocal, ldBx, KinvBx, ldKinvBx, blockSize, ctx)); 977 *RprojectorX = KinvBx; 978 *ldRprojectorX = ldKinvBx; 979 CHKERR(Num_dist_dots_Sprimme(x, ldx, KinvBx, ldKinvBx, primme->nLocal, 980 blockSize, xKinvBx, ctx)); 981 } 982 else { 983 *RprojectorX = Bx; 984 *ldRprojectorX = ldBx; 985 int i; 986 for (i=0; i<blockSize; i++) xKinvBx[i] = 1.0; 987 } 988 *sizeRprojectorX = blockSize; 989 } 990 else { 991 *RprojectorX = NULL; 992 *sizeRprojectorX = 0; 993 } 994 995 return 0; 996 997 } /* setup_JD_projectors */ 998 999 #endif /* SUPPORTED_TYPE */ 1000