1 /*
2  *  This file computes data for the deflated restarting in the Newton-basis GMRES. At each restart (or at each detected stagnation in the adaptive strategy), a basis of an (approximated)invariant subspace corresponding to the smallest eigenvalues is extracted from the Krylov subspace. It is then used to augment the Newton basis.
3  *
4  * References : D. Nuentsa Wakam and J. Erhel, Parallelism and robustness in GMRES with the Newton basis and the deflation of eigenvalues. Research report INRIA RR-7787.
5  * Author: Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr>, 2011
6  */
7 
8 #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>
9 
10 /* Quicksort algorithm to sort  the eigenvalues in increasing orders
11  * val_r - real part of eigenvalues, unchanged on exit.
12  * val_i - Imaginary part of eigenvalues unchanged on exit.
13  * size - Number of eigenvalues (with complex conjugates)
14  * perm - contains on exit the permutation vector to reorder the vectors val_r and val_i.
15  */
16 #define  DEPTH  500
KSPAGMRESQuickSort(PetscScalar * val_r,PetscScalar * val_i,PetscInt size,PetscInt * perm)17 static PetscErrorCode KSPAGMRESQuickSort(PetscScalar *val_r, PetscScalar *val_i, PetscInt size, PetscInt *perm)
18 {
19 
20   PetscInt    deb[DEPTH], fin[DEPTH];
21   PetscInt    ipivot;
22   PetscScalar pivot_r, pivot_i;
23   PetscInt    i, L, R, j;
24   PetscScalar abs_pivot;
25   PetscScalar abs_val;
26 
27   PetscFunctionBegin;
28   /* initialize perm vector */
29   for (j = 0; j < size; j++) perm[j] = j;
30 
31   deb[0] = 0;
32   fin[0] = size;
33   i      = 0;
34   while (i >= 0) {
35     L = deb[i];
36     R = fin[i] - 1;
37     if (L < R) {
38       pivot_r   = val_r[L];
39       pivot_i   = val_i[L];
40       abs_pivot = PetscSqrtReal(pivot_r * pivot_r + pivot_i * pivot_i);
41       ipivot    = perm[L];
42       if (i == DEPTH - 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
43       while (L < R) {
44         abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
45         while (abs_val >= abs_pivot && L < R) {
46           R--;
47           abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
48         }
49         if (L < R) {
50           val_r[L] = val_r[R];
51           val_i[L] = val_i[R];
52           perm[L]  = perm[R];
53           L       += 1;
54         }
55         abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
56         while (abs_val <= abs_pivot && L < R) {
57           L++;
58           abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
59         }
60         if (L < R) {
61           val_r[R] = val_r[L];
62           val_i[R] = val_i[L];
63           perm[R]  = perm[L];
64           R       -= 1;
65         }
66       }
67       val_r[L] = pivot_r;
68       val_i[L] = pivot_i;
69       perm[L]  = ipivot;
70       deb[i+1] = L + 1;
71       fin[i+1] = fin[i];
72       fin[i]   = L;
73       i       += 1;
74       if (i == DEPTH - 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
75     } else i--;
76   }
77   PetscFunctionReturn(0);
78 }
79 
80 /*
81  * Compute the Schur vectors from the generalized eigenvalue problem A.u =\lamba.B.u
82  * KspSize -  rank of the matrices A and B, size of the current Krylov basis
83  * A - Left matrix
84  * B - Right matrix
85  * ldA - first dimension of A as declared  in the calling program
86  * ldB - first dimension of B as declared  in the calling program
87  * IsReduced - specifies if the matrices are already in the reduced form,
88  * i.e A is a Hessenberg matrix and B is upper triangular.
89  * Sr - on exit, the extracted Schur vectors corresponding
90  * the smallest eigenvalues (with complex conjugates)
91  * CurNeig - Number of extracted eigenvalues
92  */
KSPAGMRESSchurForm(KSP ksp,PetscBLASInt KspSize,PetscScalar * A,PetscBLASInt ldA,PetscScalar * B,PetscBLASInt ldB,PetscBool IsReduced,PetscScalar * Sr,PetscInt * CurNeig)93 static PetscErrorCode KSPAGMRESSchurForm(KSP ksp, PetscBLASInt KspSize, PetscScalar *A, PetscBLASInt ldA, PetscScalar *B, PetscBLASInt ldB, PetscBool IsReduced, PetscScalar *Sr, PetscInt *CurNeig)
94 {
95   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
96   PetscInt       max_k   = agmres->max_k;
97   PetscBLASInt   r;
98   PetscInt       neig    = agmres->neig;
99   PetscScalar    *wr     = agmres->wr;
100   PetscScalar    *wi     = agmres->wi;
101   PetscScalar    *beta   = agmres->beta;
102   PetscScalar    *Q      = agmres->Q;
103   PetscScalar    *Z      = agmres->Z;
104   PetscScalar    *work   = agmres->work;
105   PetscBLASInt   *select = agmres->select;
106   PetscInt       *perm   = agmres->perm;
107   PetscBLASInt   sdim    = 0;
108   PetscInt       i,j;
109   PetscBLASInt   info;
110   PetscErrorCode ierr;
111   PetscBLASInt   *iwork = agmres->iwork;
112   PetscBLASInt   N = MAXKSPSIZE;
113   PetscBLASInt   lwork,liwork;
114   PetscBLASInt   ilo,ihi;
115   PetscBLASInt   ijob,wantQ,wantZ;
116   PetscScalar    Dif[2];
117 
118   PetscFunctionBegin;
119   ijob  = 2;
120   wantQ = 1;
121   wantZ = 1;
122   ierr  = PetscBLASIntCast(PetscMax(8*N+16,4*neig*(N-neig)),&lwork);CHKERRQ(ierr);
123   ierr  = PetscBLASIntCast(2*N*neig,&liwork);CHKERRQ(ierr);
124   ilo   = 1;
125   ierr  = PetscBLASIntCast(KspSize,&ihi);CHKERRQ(ierr);
126 
127   /* Compute the Schur form */
128   if (IsReduced) {                /* The eigenvalue problem is already in reduced form, meaning that A is upper Hessenberg and B is triangular */
129     PetscStackCallBLAS("LAPACKhgeqz",LAPACKhgeqz_("S", "I", "I", &KspSize, &ilo, &ihi, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, work, &lwork, &info));
130     if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling LAPACK routine xhgeqz_");
131   } else {
132     PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &KspSize, A, &ldA, B, &ldB, &sdim, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
133     if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling LAPACK routine xgges_");
134   }
135 
136   /* We should avoid computing these ratio...  */
137   for (i = 0; i < KspSize; i++) {
138     if (beta[i] != 0.0) {
139       wr[i] /= beta[i];
140       wi[i] /= beta[i];
141     }
142   }
143 
144   /* Sort the eigenvalues to extract the smallest ones */
145   ierr = KSPAGMRESQuickSort(wr, wi, KspSize, perm);CHKERRQ(ierr);
146 
147   /* Count the number of extracted eigenvalues (with complex conjugates) */
148   r = 0;
149   while (r < neig) {
150     if (wi[r] != 0) r += 2;
151     else r += 1;
152   }
153   /* Reorder the Schur decomposition so that the cluster of smallest/largest eigenvalues appears in the leading diagonal blocks of A (and B)*/
154   ierr = PetscArrayzero(select, N);CHKERRQ(ierr);
155   if (!agmres->GreatestEig) {
156     for (j = 0; j < r; j++) select[perm[j]] = 1;
157   } else {
158     for (j = 0; j < r; j++) select[perm[KspSize-j-1]] = 1;
159   }
160   PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &KspSize, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, &r, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
161   if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
162   /* Extract the Schur vectors associated to the r smallest eigenvalues */
163   ierr = PetscArrayzero(Sr,(N+1)*r);CHKERRQ(ierr);
164   for (j = 0; j < r; j++) {
165     for (i = 0; i < KspSize; i++) {
166       Sr[j*(N+1)+i] = Z[j*N+i];
167     }
168   }
169 
170   /* Broadcast Sr to all other processes to have consistent data;
171    * FIXME should investigate how to get unique Schur vectors (unique QR factorization, probably the sign of rotations) */
172   ierr = MPI_Bcast(Sr, (N+1)*r, MPIU_SCALAR, agmres->First, PetscObjectComm((PetscObject)ksp));CHKERRQ(ierr);
173   /* Update the Shift values for the Newton basis. This is surely necessary when applying the DeflationPrecond */
174   if (agmres->DeflPrecond) {
175     ierr = KSPAGMRESLejaOrdering(wr, wi, agmres->Rshift, agmres->Ishift, max_k);CHKERRQ(ierr);
176   }
177   *CurNeig = r; /* Number of extracted eigenvalues */
178   PetscFunctionReturn(0);
179 
180 }
181 
182 /*
183  * This function form the matrices for the generalized eigenvalue problem,
184  * it then compute the Schur vectors needed to augment the Newton basis.
185  */
KSPAGMRESComputeDeflationData(KSP ksp)186 PetscErrorCode KSPAGMRESComputeDeflationData(KSP ksp)
187 {
188   KSP_AGMRES     *agmres  = (KSP_AGMRES*)ksp->data;
189   Vec            *U       = agmres->U;
190   Vec            *TmpU    = agmres->TmpU;
191   PetscScalar    *MatEigL = agmres->MatEigL;
192   PetscScalar    *MatEigR = agmres->MatEigR;
193   PetscScalar    *Sr      = agmres->Sr;
194   PetscScalar    alpha, beta;
195   PetscInt       i,j;
196   PetscErrorCode ierr;
197   PetscInt       max_k = agmres->max_k;     /* size of the non - augmented subspace */
198   PetscInt       CurNeig;       /* Current number of extracted eigenvalues */
199   PetscInt       N        = MAXKSPSIZE;
200   PetscBLASInt   bN;
201   PetscInt       lC       = N + 1;
202   PetscInt       KspSize  = KSPSIZE;
203   PetscBLASInt   blC,bKspSize;
204   PetscInt       PrevNeig = agmres->r;
205 
206   PetscFunctionBegin;
207   ierr = PetscLogEventBegin(KSP_AGMRESComputeDeflationData, ksp, 0,0,0);CHKERRQ(ierr);
208   if (agmres->neig <= 1) PetscFunctionReturn(0);
209   /* Explicitly form MatEigL = H^T*H, It can also be formed as H^T+h_{N+1,N}H^-1e^T */
210   alpha = 1.0;
211   beta  = 0.0;
212   ierr = PetscBLASIntCast(KspSize,&bKspSize);CHKERRQ(ierr);
213   ierr = PetscBLASIntCast(lC,&blC);CHKERRQ(ierr);
214   ierr = PetscBLASIntCast(N,&bN);CHKERRQ(ierr);
215   PetscStackCallBLAS("BLASgemm",BLASgemm_("T", "N", &bKspSize, &bKspSize, &blC, &alpha, agmres->hes_origin, &blC, agmres->hes_origin, &blC, &beta, MatEigL, &bN));
216   if (!agmres->ritz) {
217     /* Form TmpU = V*H where V is the Newton basis orthogonalized  with roddec*/
218     for (j = 0; j < KspSize; j++) {
219       /* Apply the elementary reflectors (stored in Qloc) on H */
220       ierr = KSPAGMRESRodvec(ksp, KspSize+1, &agmres->hes_origin[j*lC], TmpU[j]);CHKERRQ(ierr);
221     }
222     /* Now form MatEigR = TmpU^T*W where W is [VEC_V(1:max_k); U] */
223     for (j = 0; j < max_k; j++) {
224       ierr = VecMDot(VEC_V(j), KspSize, TmpU, &MatEigR[j*N]);CHKERRQ(ierr);
225     }
226     for (j = max_k; j < KspSize; j++) {
227       ierr = VecMDot(U[j-max_k], KspSize, TmpU, &MatEigR[j*N]);CHKERRQ(ierr);
228     }
229   } else { /* Form H^T */
230     for (j = 0; j < N; j++) {
231       for (i = 0; i < N; i++) {
232         MatEigR[j*N+i] = agmres->hes_origin[i*lC+j];
233       }
234     }
235   }
236   /* Obtain the Schur form of  the generalized eigenvalue problem MatEigL*y = \lambda*MatEigR*y */
237   if (agmres->DeflPrecond) {
238     ierr = KSPAGMRESSchurForm(ksp, KspSize, agmres->hes_origin, lC, agmres->Rloc, lC, PETSC_TRUE, Sr, &CurNeig);CHKERRQ(ierr);
239   } else {
240     ierr = KSPAGMRESSchurForm(ksp, KspSize, MatEigL, N, MatEigR, N, PETSC_FALSE, Sr, &CurNeig);CHKERRQ(ierr);
241   }
242 
243   if (agmres->DeflPrecond) { /* Switch to DGMRES to improve the basis of the invariant subspace associated to the deflation */
244     agmres->HasSchur = PETSC_TRUE;
245     ierr             = KSPDGMRESComputeDeflationData(ksp, &CurNeig);CHKERRQ(ierr);
246     PetscFunctionReturn(0);
247   }
248   /* Form the Schur vectors in the entire subspace: U = W * Sr where W = [VEC_V(1:max_k); U]*/
249   for (j = 0; j < PrevNeig; j++) { /* First, copy U to a temporary place */
250     ierr = VecCopy(U[j], TmpU[j]);CHKERRQ(ierr);
251   }
252 
253   for (j = 0; j < CurNeig; j++) {
254     ierr = VecZeroEntries(U[j]);CHKERRQ(ierr);
255     ierr = VecMAXPY(U[j], max_k, &Sr[j*(N+1)], &VEC_V(0));CHKERRQ(ierr);
256     ierr = VecMAXPY(U[j], PrevNeig, &Sr[j*(N+1)+max_k], TmpU);CHKERRQ(ierr);
257   }
258   agmres->r = CurNeig;
259   ierr      = PetscLogEventEnd(KSP_AGMRESComputeDeflationData, ksp, 0,0,0);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262