1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * ILU solve routine
11  *
12  *****************************************************************************/
13 
14 #include "_hypre_parcsr_ls.h"
15 #include "_hypre_utilities.hpp"
16 #include "par_ilu.h"
17 
18 /*--------------------------------------------------------------------
19  * hypre_ILUSolve
20  *--------------------------------------------------------------------*/
21 HYPRE_Int
hypre_ILUSolve(void * ilu_vdata,hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u)22 hypre_ILUSolve( void               *ilu_vdata,
23                 hypre_ParCSRMatrix *A,
24                 hypre_ParVector    *f,
25                 hypre_ParVector    *u )
26 {
27    MPI_Comm             comm           = hypre_ParCSRMatrixComm(A);
28    //   HYPRE_Int            i;
29 
30    hypre_ParILUData     *ilu_data      = (hypre_ParILUData*) ilu_vdata;
31 
32 #ifdef HYPRE_USING_CUDA
33    /* pointers to cusparse data, note that they are not NULL only when needed */
34    cusparseMatDescr_t      matL_des          = hypre_ParILUDataMatLMatrixDescription(ilu_data);
35    cusparseMatDescr_t      matU_des          = hypre_ParILUDataMatUMatrixDescription(ilu_data);
36    void                    *ilu_solve_buffer = hypre_ParILUDataILUSolveBuffer(ilu_data);//device memory
37    cusparseSolvePolicy_t   ilu_solve_policy  = hypre_ParILUDataILUSolvePolicy(ilu_data);
38    hypre_CSRMatrix         *matALU_d         = hypre_ParILUDataMatAILUDevice(ilu_data);
39    hypre_CSRMatrix         *matBLU_d         = hypre_ParILUDataMatBILUDevice(ilu_data);
40    //hypre_CSRMatrix         *matSLU_d         = hypre_ParILUDataMatSILUDevice(ilu_data);
41    hypre_CSRMatrix         *matE_d           = hypre_ParILUDataMatEDevice(ilu_data);
42    hypre_CSRMatrix         *matF_d           = hypre_ParILUDataMatFDevice(ilu_data);
43    csrsv2Info_t            matAL_info        = hypre_ParILUDataMatALILUSolveInfo(ilu_data);
44    csrsv2Info_t            matAU_info        = hypre_ParILUDataMatAUILUSolveInfo(ilu_data);
45    csrsv2Info_t            matBL_info        = hypre_ParILUDataMatBLILUSolveInfo(ilu_data);
46    csrsv2Info_t            matBU_info        = hypre_ParILUDataMatBUILUSolveInfo(ilu_data);
47    csrsv2Info_t            matSL_info        = hypre_ParILUDataMatSLILUSolveInfo(ilu_data);
48    csrsv2Info_t            matSU_info        = hypre_ParILUDataMatSUILUSolveInfo(ilu_data);
49    hypre_ParCSRMatrix      *Aperm            = hypre_ParILUDataAperm(ilu_data);
50    //hypre_ParCSRMatrix      *R                = hypre_ParILUDataR(ilu_data);
51    //hypre_ParCSRMatrix      *P                = hypre_ParILUDataP(ilu_data);
52 #endif
53 
54    /* get matrices */
55    HYPRE_Int            ilu_type       = hypre_ParILUDataIluType(ilu_data);
56    HYPRE_Int            *perm          = hypre_ParILUDataPerm(ilu_data);
57    HYPRE_Int            *qperm         = hypre_ParILUDataQPerm(ilu_data);
58    hypre_ParCSRMatrix   *matA          = hypre_ParILUDataMatA(ilu_data);
59    hypre_ParCSRMatrix   *matL          = hypre_ParILUDataMatL(ilu_data);
60    HYPRE_Real           *matD          = hypre_ParILUDataMatD(ilu_data);
61    hypre_ParCSRMatrix   *matU          = hypre_ParILUDataMatU(ilu_data);
62 #ifndef HYPRE_USING_CUDA
63    hypre_ParCSRMatrix   *matmL         = hypre_ParILUDataMatLModified(ilu_data);
64    HYPRE_Real           *matmD         = hypre_ParILUDataMatDModified(ilu_data);
65    hypre_ParCSRMatrix   *matmU         = hypre_ParILUDataMatUModified(ilu_data);
66 #endif
67    hypre_ParCSRMatrix   *matS          = hypre_ParILUDataMatS(ilu_data);
68 
69    HYPRE_Int            iter, num_procs,  my_id;
70 
71    hypre_ParVector      *F_array       = hypre_ParILUDataF(ilu_data);
72    hypre_ParVector      *U_array       = hypre_ParILUDataU(ilu_data);
73 
74    /* get settings */
75    HYPRE_Real           tol            = hypre_ParILUDataTol(ilu_data);
76    HYPRE_Int            logging        = hypre_ParILUDataLogging(ilu_data);
77    HYPRE_Int            print_level    = hypre_ParILUDataPrintLevel(ilu_data);
78    HYPRE_Int            max_iter       = hypre_ParILUDataMaxIter(ilu_data);
79    HYPRE_Real           *norms         = hypre_ParILUDataRelResNorms(ilu_data);
80    hypre_ParVector      *Ftemp         = hypre_ParILUDataFTemp(ilu_data);
81    hypre_ParVector      *Utemp         = hypre_ParILUDataUTemp(ilu_data);
82    hypre_ParVector      *Xtemp         = hypre_ParILUDataXTemp(ilu_data);
83    hypre_ParVector      *Ytemp         = hypre_ParILUDataYTemp(ilu_data);
84    HYPRE_Real           *fext          = hypre_ParILUDataFExt(ilu_data);
85    HYPRE_Real           *uext          = hypre_ParILUDataUExt(ilu_data);
86    hypre_ParVector      *residual;
87 
88    HYPRE_Real           alpha          = -1;
89    HYPRE_Real           beta           = 1;
90    HYPRE_Real           conv_factor    = 0.0;
91    HYPRE_Real           resnorm        = 1.0;
92    HYPRE_Real           init_resnorm   = 0.0;
93    HYPRE_Real           rel_resnorm;
94    HYPRE_Real           rhs_norm       = 0.0;
95    HYPRE_Real           old_resnorm;
96    HYPRE_Real           ieee_check     = 0.0;
97    HYPRE_Real           operat_cmplxty = hypre_ParILUDataOperatorComplexity(ilu_data);
98 
99    HYPRE_Int            Solve_err_flag;
100 #ifdef HYPRE_USING_CUDA
101    HYPRE_Int            test_opt;
102 #endif
103 
104    /* problem size */
105    HYPRE_Int            n              = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
106    HYPRE_Int            nLU            = hypre_ParILUDataNLU(ilu_data);
107    HYPRE_Int            *u_end         = hypre_ParILUDataUEnd(ilu_data);
108 
109    /* Schur system solve */
110    HYPRE_Solver         schur_solver   = hypre_ParILUDataSchurSolver(ilu_data);
111    HYPRE_Solver         schur_precond  = hypre_ParILUDataSchurPrecond(ilu_data);
112    hypre_ParVector      *rhs           = hypre_ParILUDataRhs(ilu_data);
113    hypre_ParVector      *x             = hypre_ParILUDataX(ilu_data);
114 
115    /* begin */
116    HYPRE_ANNOTATE_FUNC_BEGIN;
117 
118    if(logging > 1)
119    {
120       residual = hypre_ParILUDataResidual(ilu_data);
121    }
122 
123    hypre_ParILUDataNumIterations(ilu_data) = 0;
124 
125    hypre_MPI_Comm_size(comm, &num_procs);
126    hypre_MPI_Comm_rank(comm,&my_id);
127 
128    /*-----------------------------------------------------------------------
129     *    Write the solver parameters
130     *-----------------------------------------------------------------------*/
131    if (my_id == 0 && print_level > 1)
132    {
133       hypre_ILUWriteSolverParams(ilu_data);
134    }
135 
136    /*-----------------------------------------------------------------------
137     *    Initialize the solver error flag
138     *-----------------------------------------------------------------------*/
139 
140    Solve_err_flag = 0;
141    /*-----------------------------------------------------------------------
142     *     write some initial info
143     *-----------------------------------------------------------------------*/
144 
145    if (my_id == 0 && print_level > 1 && tol > 0.)
146    {
147       hypre_printf("\n\n ILU SOLVER SOLUTION INFO:\n");
148    }
149 
150 
151    /*-----------------------------------------------------------------------
152     *    Compute initial residual and print
153     *-----------------------------------------------------------------------*/
154    if (print_level > 1 || logging > 1 || tol > 0.)
155    {
156       if ( logging > 1 )
157       {
158          hypre_ParVectorCopy(f, residual );
159          if (tol > 0.0)
160          {
161             hypre_ParCSRMatrixMatvec(alpha, A, u, beta, residual );
162          }
163          resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
164       }
165       else
166       {
167          hypre_ParVectorCopy(f, Ftemp);
168          if (tol > 0.0)
169          {
170             hypre_ParCSRMatrixMatvec(alpha, A, u, beta, Ftemp);
171          }
172          resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
173       }
174 
175       /* Since it is does not diminish performance, attempt to return an error flag
176          and notify users when they supply bad input. */
177       if (resnorm != 0.)
178       {
179          ieee_check = resnorm/resnorm; /* INF -> NaN conversion */
180       }
181       if (ieee_check != ieee_check)
182       {
183          /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
184             for ieee_check self-equality works on all IEEE-compliant compilers/
185             machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
186             by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
187             found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
188          if (print_level > 0)
189          {
190             hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
191             hypre_printf("ERROR -- hypre_ILUSolve: INFs and/or NaNs detected in input.\n");
192             hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n");
193             hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
194          }
195          hypre_error(HYPRE_ERROR_GENERIC);
196          HYPRE_ANNOTATE_FUNC_END;
197 
198          return hypre_error_flag;
199       }
200 
201       init_resnorm = resnorm;
202       rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f));
203       if (rhs_norm > HYPRE_REAL_EPSILON)
204       {
205          rel_resnorm = init_resnorm / rhs_norm;
206       }
207       else
208       {
209          /* rhs is zero, return a zero solution */
210          hypre_ParVectorSetConstantValues(U_array, 0.0);
211          if(logging > 0)
212          {
213             rel_resnorm = 0.0;
214             hypre_ParILUDataFinalRelResidualNorm(ilu_data) = rel_resnorm;
215          }
216          HYPRE_ANNOTATE_FUNC_END;
217 
218          return hypre_error_flag;
219       }
220    }
221    else
222    {
223       rel_resnorm = 1.;
224    }
225 
226    if (my_id == 0 && print_level > 1)
227    {
228       hypre_printf("                                            relative\n");
229       hypre_printf("               residual        factor       residual\n");
230       hypre_printf("               --------        ------       --------\n");
231       hypre_printf("    Initial    %e                 %e\n",init_resnorm,
232             rel_resnorm);
233    }
234 
235    matA = A;
236    U_array = u;
237    F_array = f;
238 
239    /************** Main Solver Loop - always do 1 iteration ************/
240    iter = 0;
241 
242    while ((rel_resnorm >= tol || iter < 1)
243          && iter < max_iter)
244    {
245 
246       /* Do one solve on LUe=r */
247       switch(ilu_type){
248          case 0: case 1:
249 #ifdef HYPRE_USING_CUDA
250             /* Apply GPU-accelerated LU solve */
251             hypre_ILUSolveCusparseLU(matA, matL_des, matU_des, matBL_info, matBU_info, matBLU_d, ilu_solve_policy,
252                                     ilu_solve_buffer, F_array, U_array, perm, n, Utemp, Ftemp);//BJ-cusparse
253 #else
254             hypre_ILUSolveLU(matA, F_array, U_array, perm, n, matL, matD, matU, Utemp, Ftemp); //BJ
255 #endif
256             break;
257          case 10: case 11:
258 #ifdef HYPRE_USING_CUDA
259             /* Apply GPU-accelerated LU solve */
260             hypre_ILUSolveCusparseSchurGMRES(matA, F_array, U_array, perm, nLU, matS, Utemp, Ftemp, schur_solver, schur_precond, rhs, x, u_end,
261                                           matL_des, matU_des, matBL_info, matBU_info, matSL_info, matSU_info,
262                                           matBLU_d, matE_d, matF_d, ilu_solve_policy, ilu_solve_buffer);//GMRES-cusparse
263 #else
264             hypre_ILUSolveSchurGMRES(matA, F_array, U_array, perm, perm, nLU, matL, matD, matU, matS,
265                            Utemp, Ftemp, schur_solver, schur_precond, rhs, x, u_end); //GMRES
266 #endif
267             break;
268          case 20: case 21:
269             hypre_ILUSolveSchurNSH(matA, F_array, U_array, perm, nLU, matL, matD, matU, matS,
270                   Utemp, Ftemp, schur_solver, rhs, x, u_end); //MR+NSH
271             break;
272          case 30: case 31:
273             hypre_ILUSolveLURAS(matA, F_array, U_array, perm, matL, matD, matU, Utemp, Utemp, fext, uext); //RAS
274             break;
275          case 40: case 41:
276             hypre_ILUSolveSchurGMRES(matA, F_array, U_array, perm, qperm, nLU, matL, matD, matU, matS,
277                   Utemp, Ftemp, schur_solver, schur_precond, rhs, x, u_end); //GMRES
278             break;
279          case 50:
280 #ifdef HYPRE_USING_CUDA
281             test_opt = hypre_ParILUDataTestOption(ilu_data);
282             hypre_ILUSolveRAPGMRES(matA, F_array, U_array, perm, nLU, matS, Utemp, Ftemp, Xtemp, Ytemp, schur_solver, schur_precond, rhs, x, u_end,
283                                  matL_des, matU_des, matAL_info, matAU_info, matBL_info, matBU_info, matSL_info, matSU_info,
284                                  Aperm, matALU_d, matBLU_d, matE_d, matF_d, ilu_solve_policy, ilu_solve_buffer, test_opt);//GMRES-RAP
285 #else
286             hypre_ILUSolveRAPGMRESHOST(matA, F_array, U_array, perm, nLU, matL, matD, matU, matmL, matmD, matmU, Utemp, Ftemp, Xtemp, Ytemp,
287                   schur_solver, schur_precond, rhs, x, u_end);//GMRES-RAP
288 #endif
289             break;
290          default:
291 #ifdef HYPRE_USING_CUDA
292          /* Apply GPU-accelerated LU solve */
293             hypre_ILUSolveCusparseLU(matA, matL_des, matU_des, matBL_info, matBU_info, matBLU_d, ilu_solve_policy,
294                                     ilu_solve_buffer, F_array, U_array, perm, n, Utemp, Ftemp);//BJ-cusparse
295 #else
296             hypre_ILUSolveLU(matA, F_array, U_array, perm, n, matL, matD, matU, Utemp, Ftemp); //BJ
297 #endif
298             break;
299 
300       }
301 
302       /*---------------------------------------------------------------
303        *    Compute residual and residual norm
304        *----------------------------------------------------------------*/
305 
306       if (print_level > 1 || logging > 1 || tol > 0.)
307       {
308          old_resnorm = resnorm;
309 
310          if ( logging > 1 ) {
311             hypre_ParVectorCopy(F_array, residual);
312             hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, residual );
313             resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
314          }
315          else {
316             hypre_ParVectorCopy(F_array, Ftemp);
317             hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, Ftemp);
318             resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
319          }
320 
321          if (old_resnorm) conv_factor = resnorm / old_resnorm;
322          else conv_factor = resnorm;
323          if (rhs_norm > HYPRE_REAL_EPSILON)
324          {
325             rel_resnorm = resnorm / rhs_norm;
326          }
327          else
328          {
329             rel_resnorm = resnorm;
330          }
331 
332          norms[iter] = rel_resnorm;
333       }
334 
335       ++iter;
336       hypre_ParILUDataNumIterations(ilu_data) = iter;
337       hypre_ParILUDataFinalRelResidualNorm(ilu_data) = rel_resnorm;
338 
339       if (my_id == 0 && print_level > 1)
340       {
341          hypre_printf("    ILUSolve %2d   %e    %f     %e \n", iter,
342                resnorm, conv_factor, rel_resnorm);
343       }
344    }
345 
346    /* check convergence within max_iter */
347    if (iter == max_iter && tol > 0.)
348    {
349       Solve_err_flag = 1;
350       hypre_error(HYPRE_ERROR_CONV);
351    }
352 
353    /*-----------------------------------------------------------------------
354     *    Print closing statistics
355     *    Add operator and grid complexity stats
356     *-----------------------------------------------------------------------*/
357 
358    if (iter > 0 && init_resnorm)
359    {
360       conv_factor = pow((resnorm/init_resnorm),(1.0/(HYPRE_Real) iter));
361    }
362    else
363    {
364       conv_factor = 1.;
365    }
366 
367    if (print_level > 1)
368    {
369       /*** compute operator and grid complexity (fill factor) here ?? ***/
370       if (my_id == 0)
371       {
372          if (Solve_err_flag == 1)
373          {
374             hypre_printf("\n\n==============================================");
375             hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
376             hypre_printf("      within the allowed %d iterations\n",max_iter);
377             hypre_printf("==============================================");
378          }
379          hypre_printf("\n\n Average Convergence Factor = %f \n",conv_factor);
380          hypre_printf("                operator = %f\n",operat_cmplxty);
381       }
382    }
383 
384    HYPRE_ANNOTATE_FUNC_END;
385 
386    return hypre_error_flag;
387 }
388 
389 /* Schur Complement solve with GMRES on schur complement
390  * ParCSRMatrix S is already built in ilu data sturcture, here directly use S
391  * L, D and U factors only have local scope (no off-diagonal processor terms)
392  * so apart from the residual calculation (which uses A), the solves with the
393  * L and U factors are local.
394  * S is the global Schur complement
395  * schur_solver is a GMRES solver
396  * schur_precond is the ILU preconditioner for GMRES
397  * rhs and x are helper vector for solving Schur system
398 */
399 
400 HYPRE_Int
hypre_ILUSolveSchurGMRES(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int * qperm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end)401 hypre_ILUSolveSchurGMRES(hypre_ParCSRMatrix *A, hypre_ParVector    *f,
402                   hypre_ParVector    *u, HYPRE_Int *perm, HYPRE_Int *qperm,
403                   HYPRE_Int nLU, hypre_ParCSRMatrix *L,
404                   HYPRE_Real* D, hypre_ParCSRMatrix *U,
405                   hypre_ParCSRMatrix *S,
406                   hypre_ParVector *ftemp, hypre_ParVector *utemp,
407                   HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
408                   hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end)
409 {
410    /* data objects for communication */
411    //   MPI_Comm          comm = hypre_ParCSRMatrixComm(A);
412 
413    /* data objects for L and U */
414    hypre_CSRMatrix   *L_diag = hypre_ParCSRMatrixDiag(L);
415    HYPRE_Real        *L_diag_data = hypre_CSRMatrixData(L_diag);
416    HYPRE_Int         *L_diag_i = hypre_CSRMatrixI(L_diag);
417    HYPRE_Int         *L_diag_j = hypre_CSRMatrixJ(L_diag);
418    hypre_CSRMatrix   *U_diag = hypre_ParCSRMatrixDiag(U);
419    HYPRE_Real        *U_diag_data = hypre_CSRMatrixData(U_diag);
420    HYPRE_Int         *U_diag_i = hypre_CSRMatrixI(U_diag);
421    HYPRE_Int         *U_diag_j = hypre_CSRMatrixJ(U_diag);
422    hypre_Vector      *utemp_local = hypre_ParVectorLocalVector(utemp);
423    HYPRE_Real        *utemp_data  = hypre_VectorData(utemp_local);
424    hypre_Vector      *ftemp_local = hypre_ParVectorLocalVector(ftemp);
425    HYPRE_Real        *ftemp_data  = hypre_VectorData(ftemp_local);
426 
427    HYPRE_Real        alpha;
428    HYPRE_Real        beta;
429    HYPRE_Int         i, j, k1, k2, col;
430 
431    /* problem size */
432    HYPRE_Int         n = hypre_CSRMatrixNumRows(L_diag);
433    //   HYPRE_Int         m = n - nLU;
434 
435    /* other data objects for computation */
436    //   hypre_Vector      *f_local;
437    //   HYPRE_Real        *f_data;
438    hypre_Vector      *rhs_local;
439    HYPRE_Real        *rhs_data;
440    hypre_Vector      *x_local;
441    HYPRE_Real        *x_data;
442 
443    /* begin */
444    beta = 1.0;
445    alpha = -1.0;
446 
447    /* compute residual */
448    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
449 
450    /* 1st need to solve LBi*xi = fi
451     * L solve, solve xi put in u_temp upper
452     */
453    //   f_local = hypre_ParVectorLocalVector(f);
454    //   f_data = hypre_VectorData(f_local);
455    /* now update with L to solve */
456    for(i = 0 ; i < nLU ; i ++)
457    {
458       utemp_data[qperm[i]] = ftemp_data[perm[i]];
459       k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
460       for(j = k1 ; j < k2 ; j ++)
461       {
462          utemp_data[qperm[i]] -= L_diag_data[j] * utemp_data[qperm[L_diag_j[j]]];
463       }
464    }
465 
466    /* 2nd need to compute g'i = gi - Ei*UBi^-1*xi
467     * now put g'i into the f_temp lower
468     */
469    for(i = nLU ; i < n ; i ++)
470    {
471       k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
472       for(j = k1 ; j < k2 ; j ++)
473       {
474          col = L_diag_j[j];
475          ftemp_data[perm[i]] -= L_diag_data[j] * utemp_data[qperm[col]];
476       }
477    }
478 
479    /* 3rd need to solve global Schur Complement Sy = g'
480     * for now only solve the local system
481     * solve y put in u_temp lower
482     * only solve whe S is not NULL
483     */
484    if(S)
485    {
486       /*initialize solution to zero for residual equation */
487       hypre_ParVectorSetConstantValues(x, 0.0);
488       /* setup vectors for solve */
489       rhs_local   = hypre_ParVectorLocalVector(rhs);
490       rhs_data    = hypre_VectorData(rhs_local);
491       x_local     = hypre_ParVectorLocalVector(x);
492       x_data      = hypre_VectorData(x_local);
493 
494       /* set rhs value */
495       for(i = nLU ; i < n ; i ++)
496       {
497          rhs_data[i-nLU] = ftemp_data[perm[i]];
498       }
499 
500       /* solve */
501       HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)S,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
502 
503       /* copy value back to original */
504       for(i = nLU ; i < n ; i ++)
505       {
506          utemp_data[qperm[i]] = x_data[i-nLU];
507       }
508    }
509 
510    /* 4th need to compute zi = xi - LBi^-1*Fi*yi
511     * put zi in f_temp upper
512     * only do this computation when nLU < n
513     * U is unsorted, search is expensive when unnecessary
514     */
515    if(nLU < n)
516    {
517       for(i = 0 ; i < nLU ; i ++)
518       {
519          ftemp_data[perm[i]] = utemp_data[qperm[i]];
520          k1 = u_end[i] ; k2 = U_diag_i[i+1];
521          for(j = k1 ; j < k2 ; j ++)
522          {
523             col = U_diag_j[j];
524             ftemp_data[perm[i]] -= U_diag_data[j] * utemp_data[qperm[col]];
525          }
526       }
527       for(i = 0 ; i < nLU ; i ++)
528       {
529          utemp_data[qperm[i]] = ftemp_data[perm[i]];
530       }
531    }
532 
533    /* 5th need to solve UBi*ui = zi */
534    /* put result in u_temp upper */
535    for(i = nLU-1 ; i >= 0 ; i --)
536    {
537       k1 = U_diag_i[i] ; k2 = u_end[i];
538       for(j = k1 ; j < k2 ; j ++)
539       {
540          col = U_diag_j[j];
541          utemp_data[qperm[i]] -= U_diag_data[j] * utemp_data[qperm[col]];
542       }
543       utemp_data[qperm[i]] *= D[i];
544    }
545 
546    /* done, now everything are in u_temp, update solution */
547    hypre_ParVectorAxpy(beta, utemp, u);
548    return hypre_error_flag;
549 }
550 
551 /* Newton-Schulz-Hotelling solve
552  * ParCSRMatrix S is already built in ilu data sturcture
553  * S here is the INVERSE of Schur Complement
554  * L, D and U factors only have local scope (no off-diagonal processor terms)
555  * so apart from the residual calculation (which uses A), the solves with the
556  * L and U factors are local.
557  * S is the inverse global Schur complement
558  * rhs and x are helper vector for solving Schur system
559 */
560 
561 HYPRE_Int
hypre_ILUSolveSchurNSH(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Solver schur_solver,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end)562 hypre_ILUSolveSchurNSH(hypre_ParCSRMatrix *A, hypre_ParVector    *f,
563                   hypre_ParVector    *u, HYPRE_Int *perm,
564                   HYPRE_Int nLU, hypre_ParCSRMatrix *L,
565                   HYPRE_Real* D, hypre_ParCSRMatrix *U,
566                   hypre_ParCSRMatrix *S,
567                   hypre_ParVector *ftemp, hypre_ParVector *utemp,
568                   HYPRE_Solver schur_solver,
569                   hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end)
570 {
571    /* data objects for communication */
572    //   MPI_Comm          comm = hypre_ParCSRMatrixComm(A);
573 
574    /* data objects for L and U */
575    hypre_CSRMatrix   *L_diag = hypre_ParCSRMatrixDiag(L);
576    HYPRE_Real        *L_diag_data = hypre_CSRMatrixData(L_diag);
577    HYPRE_Int         *L_diag_i = hypre_CSRMatrixI(L_diag);
578    HYPRE_Int         *L_diag_j = hypre_CSRMatrixJ(L_diag);
579    hypre_CSRMatrix   *U_diag = hypre_ParCSRMatrixDiag(U);
580    HYPRE_Real        *U_diag_data = hypre_CSRMatrixData(U_diag);
581    HYPRE_Int         *U_diag_i = hypre_CSRMatrixI(U_diag);
582    HYPRE_Int         *U_diag_j = hypre_CSRMatrixJ(U_diag);
583    hypre_Vector      *utemp_local = hypre_ParVectorLocalVector(utemp);
584    HYPRE_Real        *utemp_data  = hypre_VectorData(utemp_local);
585    hypre_Vector      *ftemp_local = hypre_ParVectorLocalVector(ftemp);
586    HYPRE_Real        *ftemp_data  = hypre_VectorData(ftemp_local);
587 
588    HYPRE_Real        alpha;
589    HYPRE_Real        beta;
590    HYPRE_Int         i, j, k1, k2, col;
591 
592    /* problem size */
593    HYPRE_Int         n = hypre_CSRMatrixNumRows(L_diag);
594    //   HYPRE_Int         m = n - nLU;
595 
596    /* other data objects for computation */
597    //   hypre_Vector      *f_local;
598    //   HYPRE_Real        *f_data;
599    hypre_Vector      *rhs_local;
600    HYPRE_Real        *rhs_data;
601    hypre_Vector      *x_local;
602    HYPRE_Real        *x_data;
603 
604    /* begin */
605    beta = 1.0;
606    alpha = -1.0;
607 
608    /* compute residual */
609    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
610 
611    /* 1st need to solve LBi*xi = fi
612     * L solve, solve xi put in u_temp upper
613     */
614    //   f_local = hypre_ParVectorLocalVector(f);
615    //   f_data = hypre_VectorData(f_local);
616    /* now update with L to solve */
617    for(i = 0 ; i < nLU ; i ++)
618    {
619       utemp_data[perm[i]] = ftemp_data[perm[i]];
620       k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
621       for(j = k1 ; j < k2 ; j ++)
622       {
623          utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[L_diag_j[j]]];
624       }
625    }
626 
627    /* 2nd need to compute g'i = gi - Ei*UBi^-1*xi
628     * now put g'i into the f_temp lower
629     */
630    for(i = nLU ; i < n ; i ++)
631    {
632       k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
633       for(j = k1 ; j < k2 ; j ++)
634       {
635          col = L_diag_j[j];
636          ftemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[col]];
637       }
638    }
639 
640    /* 3rd need to solve global Schur Complement Sy = g'
641     * for now only solve the local system
642     * solve y put in u_temp lower
643     * only solve when S is not NULL
644     */
645    if(S)
646    {
647       /*initialize solution to zero for residual equation */
648       hypre_ParVectorSetConstantValues(x, 0.0);
649       /* setup vectors for solve */
650       rhs_local   = hypre_ParVectorLocalVector(rhs);
651       rhs_data    = hypre_VectorData(rhs_local);
652       x_local     = hypre_ParVectorLocalVector(x);
653       x_data      = hypre_VectorData(x_local);
654 
655       /* set rhs value */
656       for(i = nLU ; i < n ; i ++)
657       {
658          rhs_data[i-nLU] = ftemp_data[perm[i]];
659       }
660 
661       /* Solve Schur system with approx inverse
662        * x = S*rhs
663        */
664       hypre_NSHSolve(schur_solver,S,rhs,x);
665 
666       /* copy value back to original */
667       for(i = nLU ; i < n ; i ++)
668       {
669          utemp_data[perm[i]] = x_data[i-nLU];
670       }
671    }
672 
673    /* 4th need to compute zi = xi - LBi^-1*yi
674     * put zi in f_temp upper
675     * only do this computation when nLU < n
676     * U is unsorted, search is expensive when unnecessary
677     */
678    if(nLU < n)
679    {
680       for(i = 0 ; i < nLU ; i ++)
681       {
682          ftemp_data[perm[i]] = utemp_data[perm[i]];
683          k1 = u_end[i] ; k2 = U_diag_i[i+1];
684          for(j = k1 ; j < k2 ; j ++)
685          {
686             col = U_diag_j[j];
687             ftemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[col]];
688          }
689       }
690       for(i = 0 ; i < nLU ; i ++)
691       {
692          utemp_data[perm[i]] = ftemp_data[perm[i]];
693       }
694    }
695 
696    /* 5th need to solve UBi*ui = zi */
697    /* put result in u_temp upper */
698    for(i = nLU-1 ; i >= 0 ; i --)
699    {
700       k1 = U_diag_i[i] ; k2 = u_end[i];
701       for(j = k1 ; j < k2 ; j ++)
702       {
703          col = U_diag_j[j];
704          utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[col]];
705       }
706       utemp_data[perm[i]] *= D[i];
707    }
708 
709    /* done, now everything are in u_temp, update solution */
710    hypre_ParVectorAxpy(beta, utemp, u);
711 
712    return hypre_error_flag;
713 }
714 
715 /* Incomplete LU solve
716  * L, D and U factors only have local scope (no off-diagonal processor terms)
717  * so apart from the residual calculation (which uses A), the solves with the
718  * L and U factors are local.
719 */
720 
721 HYPRE_Int
hypre_ILUSolveLU(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParVector * ftemp,hypre_ParVector * utemp)722 hypre_ILUSolveLU(hypre_ParCSRMatrix *A, hypre_ParVector    *f,
723                   hypre_ParVector    *u, HYPRE_Int *perm,
724                   HYPRE_Int nLU, hypre_ParCSRMatrix *L,
725                   HYPRE_Real* D, hypre_ParCSRMatrix *U,
726                   hypre_ParVector *ftemp, hypre_ParVector *utemp)
727 {
728    hypre_CSRMatrix *L_diag = hypre_ParCSRMatrixDiag(L);
729    HYPRE_Real      *L_diag_data = hypre_CSRMatrixData(L_diag);
730    HYPRE_Int       *L_diag_i = hypre_CSRMatrixI(L_diag);
731    HYPRE_Int       *L_diag_j = hypre_CSRMatrixJ(L_diag);
732 
733    hypre_CSRMatrix *U_diag = hypre_ParCSRMatrixDiag(U);
734    HYPRE_Real      *U_diag_data = hypre_CSRMatrixData(U_diag);
735    HYPRE_Int       *U_diag_i = hypre_CSRMatrixI(U_diag);
736    HYPRE_Int       *U_diag_j = hypre_CSRMatrixJ(U_diag);
737 
738    hypre_Vector    *utemp_local = hypre_ParVectorLocalVector(utemp);
739    HYPRE_Real      *utemp_data  = hypre_VectorData(utemp_local);
740 
741    hypre_Vector    *ftemp_local = hypre_ParVectorLocalVector(ftemp);
742    HYPRE_Real      *ftemp_data  = hypre_VectorData(ftemp_local);
743 
744    HYPRE_Real      alpha;
745    HYPRE_Real      beta;
746    HYPRE_Int       i, j, k1, k2;
747 
748    /* begin */
749    alpha = -1.0;
750    beta = 1.0;
751 
752    /* Initialize Utemp to zero.
753     * This is necessary for correctness, when we use optimized
754     * vector operations in the case where sizeof(L, D or U) < sizeof(A)
755     */
756    //hypre_ParVectorSetConstantValues( utemp, 0.);
757    /* compute residual */
758    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
759 
760    /* L solve - Forward solve */
761    /* copy rhs to account for diagonal of L (which is identity) */
762    for( i = 0; i < nLU; i++ )
763    {
764       utemp_data[perm[i]] = ftemp_data[perm[i]];
765    }
766    /* update with remaining (off-diagonal) entries of L */
767    for( i = 0; i < nLU; i++ )
768    {
769       k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
770       for(j=k1; j <k2; j++)
771       {
772          utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[L_diag_j[j]]];
773       }
774    }
775    /*-------------------- U solve - Backward substitution */
776    for( i = nLU-1; i >= 0; i-- )
777    {
778       /* first update with the remaining (off-diagonal) entries of U */
779       k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
780       for(j=k1; j <k2; j++)
781       {
782          utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[U_diag_j[j]]];
783       }
784       /* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
785       utemp_data[perm[i]] *= D[i];
786    }
787 
788    /* Update solution */
789    hypre_ParVectorAxpy(beta, utemp, u);
790 
791 
792    return hypre_error_flag;
793 }
794 
795 
796 /* Incomplete LU solve RAS
797  * L, D and U factors only have local scope (no off-diagonal processor terms)
798  * so apart from the residual calculation (which uses A), the solves with the
799  * L and U factors are local.
800  * fext and uext are tempory arrays for external data
801 */
802 
803 HYPRE_Int
hypre_ILUSolveLURAS(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Real * fext,HYPRE_Real * uext)804 hypre_ILUSolveLURAS(hypre_ParCSRMatrix *A, hypre_ParVector    *f,
805                   hypre_ParVector    *u, HYPRE_Int *perm,
806                   hypre_ParCSRMatrix *L,
807                   HYPRE_Real* D, hypre_ParCSRMatrix *U,
808                   hypre_ParVector *ftemp, hypre_ParVector *utemp,
809                   HYPRE_Real *fext, HYPRE_Real *uext)
810 {
811 
812    hypre_ParCSRCommPkg        *comm_pkg;
813    hypre_ParCSRCommHandle     *comm_handle;
814    HYPRE_Int                  num_sends, begin, end;
815 
816    hypre_CSRMatrix            *L_diag = hypre_ParCSRMatrixDiag(L);
817    HYPRE_Real                 *L_diag_data = hypre_CSRMatrixData(L_diag);
818    HYPRE_Int                  *L_diag_i = hypre_CSRMatrixI(L_diag);
819    HYPRE_Int                  *L_diag_j = hypre_CSRMatrixJ(L_diag);
820 
821    hypre_CSRMatrix            *U_diag = hypre_ParCSRMatrixDiag(U);
822    HYPRE_Real                 *U_diag_data = hypre_CSRMatrixData(U_diag);
823    HYPRE_Int                  *U_diag_i = hypre_CSRMatrixI(U_diag);
824    HYPRE_Int                  *U_diag_j = hypre_CSRMatrixJ(U_diag);
825 
826    HYPRE_Int                  n = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A));
827    HYPRE_Int                  m = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
828    //   HYPRE_Int                  buffer_size;
829    HYPRE_Int                  n_total = m + n;
830 
831    HYPRE_Int                  idx;
832    HYPRE_Int                  jcol;
833    HYPRE_Int                  col;
834 
835    hypre_Vector               *utemp_local = hypre_ParVectorLocalVector(utemp);
836    HYPRE_Real                 *utemp_data  = hypre_VectorData(utemp_local);
837 
838    hypre_Vector               *ftemp_local = hypre_ParVectorLocalVector(ftemp);
839    HYPRE_Real                 *ftemp_data  = hypre_VectorData(ftemp_local);
840 
841    HYPRE_Real                 alpha;
842    HYPRE_Real                 beta;
843    HYPRE_Int                  i, j, k1, k2;
844 
845    /* begin */
846    alpha = -1.0;
847    beta = 1.0;
848 
849    /* prepare for communication */
850    comm_pkg = hypre_ParCSRMatrixCommPkg(A);
851    /* setup if not yet built */
852    if(!comm_pkg)
853    {
854       hypre_MatvecCommPkgCreate(A);
855       comm_pkg = hypre_ParCSRMatrixCommPkg(A);
856    }
857 
858    /* Initialize Utemp to zero.
859     * This is necessary for correctness, when we use optimized
860     * vector operations in the case where sizeof(L, D or U) < sizeof(A)
861     */
862    //hypre_ParVectorSetConstantValues( utemp, 0.);
863    /* compute residual */
864    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
865 
866    /* communication to get external data */
867 
868    /* get total num of send */
869    num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
870    begin = hypre_ParCSRCommPkgSendMapStart(comm_pkg,0);
871    end = hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends);
872 
873    /* copy new index into send_buf */
874    for(i = begin ; i < end ; i ++)
875    {
876       /* all we need is just send out data, we don't need to worry about the
877        *    permutation of offd part, actually we don't need to worry about
878        *    permutation at all
879        * borrow uext as send buffer .
880        */
881       uext[i-begin] = ftemp_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i)];
882    }
883 
884    /* main communication */
885    comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, uext, fext);
886    hypre_ParCSRCommHandleDestroy(comm_handle);
887 
888    /* L solve - Forward solve */
889    for( i = 0 ; i < n_total ; i ++)
890    {
891       k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
892       if( i < n )
893       {
894          /* diag part */
895          utemp_data[perm[i]] = ftemp_data[perm[i]];
896          for(j=k1; j <k2; j++)
897          {
898             col = L_diag_j[j];
899             if( col < n )
900             {
901                utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[col]];
902             }
903             else
904             {
905                jcol = col - n;
906                utemp_data[perm[i]] -= L_diag_data[j] * uext[jcol];
907             }
908          }
909       }
910       else
911       {
912          /* offd part */
913          idx = i - n;
914          uext[idx] = fext[idx];
915          for(j=k1; j <k2; j++)
916          {
917             col = L_diag_j[j];
918             if(col < n)
919             {
920                uext[idx] -= L_diag_data[j] * utemp_data[perm[col]];
921             }
922             else
923             {
924                jcol = col - n;
925                uext[idx] -= L_diag_data[j] * uext[jcol];
926             }
927          }
928       }
929    }
930 
931    /*-------------------- U solve - Backward substitution */
932    for( i = n_total-1; i >= 0; i-- )
933    {
934       /* first update with the remaining (off-diagonal) entries of U */
935       k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
936       if( i < n )
937       {
938          /* diag part */
939          for(j=k1; j <k2; j++)
940          {
941             col = U_diag_j[j];
942             if( col < n )
943             {
944                utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[col]];
945             }
946             else
947             {
948                jcol = col - n;
949                utemp_data[perm[i]] -= U_diag_data[j] * uext[jcol];
950             }
951          }
952          /* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
953          utemp_data[perm[i]] *= D[i];
954       }
955       else
956       {
957          /* 2nd part of offd */
958          idx = i - n;
959          for(j=k1; j <k2; j++)
960          {
961             col = U_diag_j[j];
962             if( col < n )
963             {
964                uext[idx] -= U_diag_data[j] * utemp_data[perm[col]];
965             }
966             else
967             {
968                jcol = col - n;
969                uext[idx] -= U_diag_data[j] * uext[jcol];
970             }
971          }
972          /* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
973          uext[idx] *= D[i];
974       }
975 
976    }
977    /* Update solution */
978    hypre_ParVectorAxpy(beta, utemp, u);
979 
980    return hypre_error_flag;
981 }
982 
983 #ifdef HYPRE_USING_CUDA
984 
985 /* Permutation function (for GPU version, can just call thrust)
986  * option 00: perm integer array
987  * option 01: rperm integer array
988  * option 10: perm real array
989  * option 11: rperm real array
990  * */
991 HYPRE_Int
hypre_ILUSeqVectorPerm(void * vectori,void * vectoro,HYPRE_Int size,HYPRE_Int * perm,HYPRE_Int option)992 hypre_ILUSeqVectorPerm(void *vectori, void *vectoro, HYPRE_Int size, HYPRE_Int *perm, HYPRE_Int option)
993 {
994    cudaDeviceSynchronize();
995    HYPRE_Int i;
996    switch(option)
997    {
998       case 00:
999       {
1000          HYPRE_Int *ivectori     = (HYPRE_Int *) vectori;
1001          HYPRE_Int *ivectoro     = (HYPRE_Int *) vectoro;
1002          for(i = 0 ; i < size ; i ++)
1003          {
1004             ivectoro[i] = ivectori[perm[i]];
1005          }
1006          break;
1007       }
1008       case 01:
1009       {
1010          HYPRE_Int *ivectori     = (HYPRE_Int *) vectori;
1011          HYPRE_Int *ivectoro     = (HYPRE_Int *) vectoro;
1012          for(i = 0 ; i < size ; i ++)
1013          {
1014             ivectoro[perm[i]] = ivectori[i];
1015          }
1016          break;
1017       }
1018       case 10:
1019       {
1020          HYPRE_Real *dvectori     = (HYPRE_Real *) vectori;
1021          HYPRE_Real *dvectoro     = (HYPRE_Real *) vectoro;
1022          for(i = 0 ; i < size ; i ++)
1023          {
1024             dvectoro[i] = dvectori[perm[i]];
1025          }
1026          break;
1027       }
1028       case 11:
1029       {
1030          HYPRE_Real *dvectori     = (HYPRE_Real *) vectori;
1031          HYPRE_Real *dvectoro     = (HYPRE_Real *) vectoro;
1032          for(i = 0 ; i < size ; i ++)
1033          {
1034             dvectoro[perm[i]] = dvectori[i];
1035          }
1036          break;
1037       }
1038       default:
1039       {
1040          printf("Error option in ILUSeqVectorPerm");
1041          hypre_assert(1==0);
1042       }
1043    }
1044    return hypre_error_flag;
1045 }
1046 
1047 /* Incomplete LU solve (GPU)
1048  * L, D and U factors only have local scope (no off-diagonal processor terms)
1049  * so apart from the residual calculation (which uses A), the solves with the
1050  * L and U factors are local.
1051 */
1052 
1053 HYPRE_Int
hypre_ILUSolveCusparseLU(hypre_ParCSRMatrix * A,cusparseMatDescr_t matL_des,cusparseMatDescr_t matU_des,csrsv2Info_t matL_info,csrsv2Info_t matU_info,hypre_CSRMatrix * matLU_d,cusparseSolvePolicy_t ilu_solve_policy,void * ilu_solve_buffer,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int n,hypre_ParVector * ftemp,hypre_ParVector * utemp)1054 hypre_ILUSolveCusparseLU(hypre_ParCSRMatrix *A, cusparseMatDescr_t matL_des, cusparseMatDescr_t matU_des,
1055                   csrsv2Info_t matL_info, csrsv2Info_t matU_info, hypre_CSRMatrix *matLU_d,
1056                   cusparseSolvePolicy_t ilu_solve_policy, void *ilu_solve_buffer,
1057                   hypre_ParVector *f,  hypre_ParVector *u, HYPRE_Int *perm,
1058                   HYPRE_Int n, hypre_ParVector *ftemp, hypre_ParVector *utemp)
1059 {
1060 
1061    /* Only solve when we have stuffs to be solved */
1062    if(n == 0)
1063    {
1064       return hypre_error_flag;
1065    }
1066 
1067    /* ILU data */
1068    HYPRE_Real              *LU_data             = hypre_CSRMatrixData(matLU_d);
1069    HYPRE_Int               *LU_i                = hypre_CSRMatrixI(matLU_d);
1070    HYPRE_Int               *LU_j                = hypre_CSRMatrixJ(matLU_d);
1071    HYPRE_Int               nnz                  = LU_i[n];
1072 
1073    hypre_Vector            *utemp_local         = hypre_ParVectorLocalVector(utemp);
1074    HYPRE_Real              *utemp_data          = hypre_VectorData(utemp_local);
1075 
1076    hypre_Vector            *ftemp_local         = hypre_ParVectorLocalVector(ftemp);
1077    HYPRE_Real              *ftemp_data          = hypre_VectorData(ftemp_local);
1078 
1079    HYPRE_Real              alpha;
1080    HYPRE_Real              beta;
1081    //HYPRE_Int               i, j, k1, k2;
1082 
1083    HYPRE_Int               isDoublePrecision    = sizeof(HYPRE_Complex) == sizeof(hypre_double);
1084    HYPRE_Int               isSinglePrecision    = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
1085 
1086    hypre_assert(isDoublePrecision || isSinglePrecision);
1087 
1088    /* begin */
1089    alpha = -1.0;
1090    beta = 1.0;
1091 
1092    cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
1093 
1094    /* Initialize Utemp to zero.
1095     * This is necessary for correctness, when we use optimized
1096     * vector operations in the case where sizeof(L, D or U) < sizeof(A)
1097    */
1098    //hypre_ParVectorSetConstantValues( utemp, 0.);
1099    /* compute residual */
1100    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
1101 
1102    /* apply permutation */
1103    HYPRE_THRUST_CALL(gather, perm, perm + n, ftemp_data, utemp_data);
1104 
1105    if(isDoublePrecision)
1106    {
1107       /* L solve - Forward solve */
1108       HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1109                                                 n, nnz, (hypre_double *) &beta, matL_des,
1110                                                 (hypre_double *) LU_data, LU_i, LU_j, matL_info,
1111                                                 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1112 
1113       /* U solve - Backward substitution */
1114       HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1115                                                 n, nnz, (hypre_double *) &beta, matU_des,
1116                                                 (hypre_double *) LU_data, LU_i, LU_j, matU_info,
1117                                                 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1118    }
1119    else if(isSinglePrecision)
1120    {
1121       /* L solve - Forward solve */
1122       HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1123                                                 n, nnz, (float *) &beta, matL_des,
1124                                                 (float *) LU_data, LU_i, LU_j, matL_info,
1125                                                 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1126 
1127       /* U solve - Backward substitution */
1128       HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1129                                                 n, nnz, (float *) &beta, matU_des,
1130                                                 (float *) LU_data, LU_i, LU_j, matU_info,
1131                                                 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1132    }
1133 
1134    /* apply reverse permutation */
1135    HYPRE_THRUST_CALL(scatter,utemp_data, utemp_data + n, perm, ftemp_data);
1136    /* Update solution */
1137    hypre_ParVectorAxpy(beta, ftemp, u);
1138 
1139 
1140    return hypre_error_flag;
1141 }
1142 
1143 /* Schur Complement solve with GMRES on schur complement
1144  * ParCSRMatrix S is already built in ilu data sturcture, here directly use S
1145  * L, D and U factors only have local scope (no off-diagonal processor terms)
1146  * so apart from the residual calculation (which uses A), the solves with the
1147  * L and U factors are local.
1148  * S is the global Schur complement
1149  * schur_solver is a GMRES solver
1150  * schur_precond is the ILU preconditioner for GMRES
1151  * rhs and x are helper vector for solving Schur system
1152 */
1153 
1154 HYPRE_Int
hypre_ILUSolveCusparseSchurGMRES(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end,cusparseMatDescr_t matL_des,cusparseMatDescr_t matU_des,csrsv2Info_t matBL_info,csrsv2Info_t matBU_info,csrsv2Info_t matSL_info,csrsv2Info_t matSU_info,hypre_CSRMatrix * matBLU_d,hypre_CSRMatrix * matE_d,hypre_CSRMatrix * matF_d,cusparseSolvePolicy_t ilu_solve_policy,void * ilu_solve_buffer)1155 hypre_ILUSolveCusparseSchurGMRES(hypre_ParCSRMatrix *A, hypre_ParVector *f,
1156                                  hypre_ParVector *u, HYPRE_Int *perm,
1157                                  HYPRE_Int nLU, hypre_ParCSRMatrix *S,
1158                                  hypre_ParVector *ftemp, hypre_ParVector *utemp,
1159                                  HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
1160                                  hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end,
1161                                  cusparseMatDescr_t matL_des, cusparseMatDescr_t matU_des,
1162                                  csrsv2Info_t matBL_info, csrsv2Info_t matBU_info, csrsv2Info_t matSL_info, csrsv2Info_t matSU_info,
1163                                  hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d,
1164                                  cusparseSolvePolicy_t ilu_solve_policy, void *ilu_solve_buffer)
1165 {
1166    /* If we don't have S block, just do one L solve and one U solve */
1167    if(!S)
1168    {
1169       /* Just call BJ cusparse and return */
1170       return hypre_ILUSolveCusparseLU(A, matL_des, matU_des, matBL_info, matBU_info, matBLU_d, ilu_solve_policy,
1171                                        ilu_solve_buffer, f, u, perm, nLU, ftemp, utemp);
1172    }
1173 
1174    /* data objects for communication */
1175 //   MPI_Comm          comm = hypre_ParCSRMatrixComm(A);
1176 
1177    /* data objects for temp vector */
1178    hypre_Vector      *utemp_local = hypre_ParVectorLocalVector(utemp);
1179    HYPRE_Real        *utemp_data  = hypre_VectorData(utemp_local);
1180    hypre_Vector      *ftemp_local = hypre_ParVectorLocalVector(ftemp);
1181    HYPRE_Real        *ftemp_data  = hypre_VectorData(ftemp_local);
1182    hypre_Vector      *rhs_local   = hypre_ParVectorLocalVector(rhs);
1183    HYPRE_Real        *rhs_data    = hypre_VectorData(rhs_local);
1184    hypre_Vector      *x_local     = hypre_ParVectorLocalVector(x);
1185    HYPRE_Real        *x_data      = hypre_VectorData(x_local);
1186 
1187    HYPRE_Real        alpha;
1188    HYPRE_Real        beta;
1189    //HYPRE_Real        gamma;
1190    //HYPRE_Int         i, j, k1, k2, col;
1191 
1192    /* problem size */
1193    HYPRE_Int         *BLU_i      = NULL;
1194    HYPRE_Int         *BLU_j      = NULL;
1195    HYPRE_Real        *BLU_data   = NULL;
1196    HYPRE_Int         BLU_nnz     = 0;
1197    hypre_CSRMatrix   *matSLU_d   = hypre_ParCSRMatrixDiag(S);
1198    HYPRE_Int         *SLU_i      = hypre_CSRMatrixI(matSLU_d);
1199    HYPRE_Int         *SLU_j      = hypre_CSRMatrixJ(matSLU_d);
1200    HYPRE_Real        *SLU_data   = hypre_CSRMatrixData(matSLU_d);
1201    HYPRE_Int         m           = hypre_CSRMatrixNumRows(matSLU_d);
1202    HYPRE_Int         n           = nLU + m;
1203    HYPRE_Int         SLU_nnz     = SLU_i[m];
1204 
1205    hypre_Vector *ftemp_upper           = hypre_SeqVectorCreate(nLU);
1206    hypre_Vector *utemp_lower           = hypre_SeqVectorCreate(m);
1207    hypre_VectorOwnsData(ftemp_upper)   = 0;
1208    hypre_VectorOwnsData(utemp_lower)   = 0;
1209    hypre_VectorData(ftemp_upper)       = ftemp_data;
1210    hypre_VectorData(utemp_lower)       = utemp_data + nLU;
1211    hypre_SeqVectorInitialize(ftemp_upper);
1212    hypre_SeqVectorInitialize(utemp_lower);
1213 
1214    if( nLU > 0)
1215    {
1216       BLU_i                      = hypre_CSRMatrixI(matBLU_d);
1217       BLU_j                      = hypre_CSRMatrixJ(matBLU_d);
1218       BLU_data                   = hypre_CSRMatrixData(matBLU_d);
1219       BLU_nnz                    = BLU_i[nLU];
1220    }
1221 
1222    /* begin */
1223    beta = 1.0;
1224    alpha = -1.0;
1225    //gamma = 0.0;
1226 
1227    HYPRE_Int               isDoublePrecision    = sizeof(HYPRE_Complex) == sizeof(hypre_double);
1228    HYPRE_Int               isSinglePrecision    = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
1229 
1230    hypre_assert(isDoublePrecision || isSinglePrecision);
1231 
1232    cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
1233 
1234    /* compute residual */
1235    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
1236 
1237    /* 1st need to solve LBi*xi = fi
1238     * L solve, solve xi put in u_temp upper
1239     */
1240 
1241    /* apply permutation before we can start our solve */
1242    HYPRE_THRUST_CALL(gather, perm, perm + n, ftemp_data, utemp_data);
1243 
1244    if(nLU > 0)
1245    {
1246 
1247       /* This solve won't touch data in utemp, thus, gi is still in utemp_lower */
1248       if(isDoublePrecision)
1249       {
1250          /* L solve - Forward solve */
1251          HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1252                                                    nLU, BLU_nnz, (hypre_double *) &beta, matL_des,
1253                                                    (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1254                                                    (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1255       }
1256       else if(isSinglePrecision)
1257       {
1258          /* L solve - Forward solve */
1259          HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1260                                                    nLU, BLU_nnz, (float *) &beta, matL_des,
1261                                                    (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1262                                                    (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1263       }
1264 
1265       /* 2nd need to compute g'i = gi - Ei*UBi^{-1}*xi
1266        * Ei*UBi^{-1} is exactly the matE_d here
1267        * Now:  LBi^{-1}f_i is in ftemp_upper
1268        *       gi' is in utemp_lower
1269        */
1270 
1271       hypre_CSRMatrixMatvec(alpha, matE_d, ftemp_upper, beta, utemp_lower);
1272 
1273    }
1274 
1275    /* 3rd need to solve global Schur Complement M^{-1}Sy = M^{-1}g'
1276     * for now only solve the local system
1277     * solve y put in u_temp lower
1278     * only solve whe S is not NULL
1279     */
1280 
1281    /* setup vectors for solve
1282     * rhs = M^{-1}g'
1283     */
1284 
1285    if(m > 0)
1286    {
1287       if(isDoublePrecision)
1288       {
1289          /* L solve */
1290          HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1291                                                    m, SLU_nnz, (hypre_double *) &beta, matL_des,
1292                                                    (hypre_double *) SLU_data, SLU_i, SLU_j, matSL_info,
1293                                                    (hypre_double *) utemp_data + nLU, (hypre_double *) ftemp_data + nLU, ilu_solve_policy, ilu_solve_buffer));
1294 
1295          /* U solve */
1296          HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1297                                                    m, SLU_nnz, (hypre_double *) &beta, matU_des,
1298                                                    (hypre_double *) SLU_data, SLU_i, SLU_j, matSU_info,
1299                                                    (hypre_double *) ftemp_data + nLU, (hypre_double *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1300       }
1301       else if(isSinglePrecision)
1302       {
1303          /* L solve */
1304          HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1305                                                    m, SLU_nnz, (float *) &beta, matL_des,
1306                                                    (float *) SLU_data, SLU_i, SLU_j, matSL_info,
1307                                                    (float *) utemp_data + nLU, (float *) ftemp_data + nLU, ilu_solve_policy, ilu_solve_buffer));
1308 
1309          /* U solve */
1310          HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1311                                                    m, SLU_nnz, (float *) &beta, matU_des,
1312                                                    (float *) SLU_data, SLU_i, SLU_j, matSU_info,
1313                                                    (float *) ftemp_data + nLU, (float *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1314       }
1315    }
1316 
1317 
1318    /* solve */
1319    /* with tricky initial guess */
1320    //hypre_Vector *tv = hypre_ParVectorLocalVector(x);
1321    //HYPRE_Real *tz = hypre_VectorData(tv);
1322    HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
1323    /* 4th need to compute zi = xi - LBi^-1*yi
1324     * put zi in f_temp upper
1325     * only do this computation when nLU < n
1326     * U is unsorted, search is expensive when unnecessary
1327     */
1328 
1329    if(nLU > 0)
1330    {
1331       hypre_CSRMatrixMatvec(alpha, matF_d, x_local, beta, ftemp_upper);
1332 
1333       /* 5th need to solve UBi*ui = zi */
1334       /* put result in u_temp upper */
1335 
1336       if(isDoublePrecision)
1337       {
1338          /* U solve - Forward solve */
1339          HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1340                                                    nLU, BLU_nnz, (hypre_double *) &beta, matU_des,
1341                                                    (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1342                                                    (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1343       }
1344       else if(isSinglePrecision)
1345       {
1346          /* U solve - Forward solve */
1347          HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1348                                                    nLU, BLU_nnz, (float *) &beta, matU_des,
1349                                                    (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1350                                                    (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1351       }
1352    }
1353 
1354    /* copy lower part solution into u_temp as well */
1355    hypre_TMemcpy(utemp_data + nLU, x_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1356 
1357    /* perm back */
1358    HYPRE_THRUST_CALL(scatter,utemp_data, utemp_data + n, perm, ftemp_data);
1359 
1360    /* done, now everything are in u_temp, update solution */
1361    hypre_ParVectorAxpy(beta, ftemp, u);
1362 
1363    hypre_SeqVectorDestroy(ftemp_upper);
1364    hypre_SeqVectorDestroy(utemp_lower);
1365 
1366    return hypre_error_flag;
1367 }
1368 
1369 /* Schur Complement solve with GMRES on schur complement, RAP style
1370  * ParCSRMatrix S is already built in ilu data sturcture, here directly use S
1371  * L, D and U factors only have local scope (no off-diagonal processor terms)
1372  * so apart from the residual calculation (which uses A), the solves with the
1373  * L and U factors are local.
1374  * S is the global Schur complement
1375  * schur_solver is a GMRES solver
1376  * schur_precond is the ILU preconditioner for GMRES
1377  * rhs and x are helper vector for solving Schur system
1378 */
1379 
1380 HYPRE_Int
hypre_ILUSolveRAPGMRES(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,hypre_ParVector * xtemp,hypre_ParVector * ytemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end,cusparseMatDescr_t matL_des,cusparseMatDescr_t matU_des,csrsv2Info_t matAL_info,csrsv2Info_t matAU_info,csrsv2Info_t matBL_info,csrsv2Info_t matBU_info,csrsv2Info_t matSL_info,csrsv2Info_t matSU_info,hypre_ParCSRMatrix * Aperm,hypre_CSRMatrix * matALU_d,hypre_CSRMatrix * matBLU_d,hypre_CSRMatrix * matE_d,hypre_CSRMatrix * matF_d,cusparseSolvePolicy_t ilu_solve_policy,void * ilu_solve_buffer,HYPRE_Int test_opt)1381 hypre_ILUSolveRAPGMRES(hypre_ParCSRMatrix *A, hypre_ParVector *f,
1382                                  hypre_ParVector *u, HYPRE_Int *perm,
1383                                  HYPRE_Int nLU, hypre_ParCSRMatrix *S,
1384                                  hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_ParVector *ytemp,
1385                                  HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
1386                                  hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end,
1387                                  cusparseMatDescr_t matL_des, cusparseMatDescr_t matU_des,
1388                                  csrsv2Info_t matAL_info, csrsv2Info_t matAU_info,
1389                                  csrsv2Info_t matBL_info, csrsv2Info_t matBU_info,
1390                                  csrsv2Info_t matSL_info, csrsv2Info_t matSU_info,
1391                                  hypre_ParCSRMatrix *Aperm, hypre_CSRMatrix *matALU_d, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d,
1392                                  cusparseSolvePolicy_t ilu_solve_policy, void *ilu_solve_buffer, HYPRE_Int test_opt)
1393 {
1394    /* data objects for communication */
1395 //   MPI_Comm          comm = hypre_ParCSRMatrixComm(A);
1396 
1397    /* data objects for temp vector */
1398    hypre_Vector      *utemp_local = hypre_ParVectorLocalVector(utemp);
1399    HYPRE_Real        *utemp_data  = hypre_VectorData(utemp_local);
1400    hypre_Vector      *ftemp_local = hypre_ParVectorLocalVector(ftemp);
1401    HYPRE_Real        *ftemp_data  = hypre_VectorData(ftemp_local);
1402    hypre_Vector      *xtemp_local = hypre_ParVectorLocalVector(xtemp);
1403    HYPRE_Real        *xtemp_data  = hypre_VectorData(xtemp_local);
1404    //hypre_Vector      *ytemp_local = hypre_ParVectorLocalVector(ytemp);
1405    //HYPRE_Real        *ytemp_data  = hypre_VectorData(ytemp_local);
1406    hypre_Vector      *rhs_local   = hypre_ParVectorLocalVector(rhs);
1407    HYPRE_Real        *rhs_data    = hypre_VectorData(rhs_local);
1408    hypre_Vector      *x_local     = hypre_ParVectorLocalVector(x);
1409    HYPRE_Real        *x_data      = hypre_VectorData(x_local);
1410 
1411    //HYPRE_Int         i, j, k1, k2, col;
1412 
1413    /* problem size */
1414    HYPRE_Int         *ALU_i      = hypre_CSRMatrixI(matALU_d);
1415    HYPRE_Int         *ALU_j      = hypre_CSRMatrixJ(matALU_d);
1416    HYPRE_Real        *ALU_data   = hypre_CSRMatrixData(matALU_d);
1417    HYPRE_Int         *BLU_i      = hypre_CSRMatrixI(matBLU_d);
1418    HYPRE_Int         *BLU_j      = hypre_CSRMatrixJ(matBLU_d);
1419    HYPRE_Real        *BLU_data   = hypre_CSRMatrixData(matBLU_d);
1420    HYPRE_Int          BLU_nnz    = BLU_i[nLU];
1421    hypre_CSRMatrix   *matSLU_d   = hypre_ParCSRMatrixDiag(S);
1422    HYPRE_Int         *SLU_i      = hypre_CSRMatrixI(matSLU_d);
1423    HYPRE_Int         *SLU_j      = hypre_CSRMatrixJ(matSLU_d);
1424    HYPRE_Real        *SLU_data   = hypre_CSRMatrixData(matSLU_d);
1425    HYPRE_Int         m           = hypre_CSRMatrixNumRows(matSLU_d);
1426    HYPRE_Int         n           = nLU + m;
1427    HYPRE_Int         SLU_nnz     = SLU_i[m];
1428    HYPRE_Int         ALU_nnz     = ALU_i[n];
1429 
1430    hypre_Vector *ftemp_upper           = hypre_SeqVectorCreate(nLU);
1431    hypre_Vector *utemp_lower           = hypre_SeqVectorCreate(m);
1432    hypre_VectorOwnsData(ftemp_upper)   = 0;
1433    hypre_VectorOwnsData(utemp_lower)   = 0;
1434    hypre_VectorData(ftemp_upper)       = ftemp_data;
1435    hypre_VectorData(utemp_lower)       = utemp_data + nLU;
1436    hypre_SeqVectorInitialize(ftemp_upper);
1437    hypre_SeqVectorInitialize(utemp_lower);
1438 
1439    /* begin */
1440    HYPRE_Real one = 1.0;
1441    HYPRE_Real mone = -1.0;
1442    HYPRE_Real zero = 0.0;
1443 
1444    HYPRE_Int               isDoublePrecision    = sizeof(HYPRE_Complex) == sizeof(hypre_double);
1445    HYPRE_Int               isSinglePrecision    = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
1446 
1447    hypre_assert(isDoublePrecision || isSinglePrecision);
1448 
1449    cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
1450 
1451    switch(test_opt)
1452    {
1453       case 1: case 3:
1454       {
1455          /* E and F */
1456          /* compute residual */
1457          hypre_ParCSRMatrixMatvecOutOfPlace(mone, A, u, one, f, utemp);
1458 
1459          /* apply permutation before we can start our solve
1460           * Au=f -> (PAQ)Q'u=Pf
1461           */
1462          HYPRE_THRUST_CALL(gather, perm, perm + n, utemp_data, ftemp_data);
1463 
1464          /* A-smoothing
1465           * x = [UA\(LA\(P*f_u))] fill to xtemp
1466           */
1467          if(n > 0)
1468          {
1469             if(isDoublePrecision)
1470             {
1471                /* L solve - Forward solve */
1472                HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1473                                                          n, ALU_nnz, (hypre_double *) &one, matL_des,
1474                                                          (hypre_double *) ALU_data, ALU_i, ALU_j, matAL_info,
1475                                                          (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1476                /* U solve - Forward solve */
1477                HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1478                                                          n, ALU_nnz, (hypre_double *) &one, matU_des,
1479                                                          (hypre_double *) ALU_data, ALU_i, ALU_j, matAU_info,
1480                                                          (hypre_double *) utemp_data, (hypre_double *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1481             }
1482             else if(isSinglePrecision)
1483             {
1484                /* L solve - Forward solve */
1485                HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1486                                                          n, ALU_nnz, (float *) &one, matL_des,
1487                                                          (float *) ALU_data, ALU_i, ALU_j, matAL_info,
1488                                                          (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1489                /* U solve - Forward solve */
1490                HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1491                                                          n, ALU_nnz, (float *) &one, matU_des,
1492                                                          (float *) ALU_data, ALU_i, ALU_j, matAU_info,
1493                                                          (float *) utemp_data, (float *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1494             }
1495          }
1496          /* residual, we should not touch xtemp for now
1497           * r = R*(f-PAQx)
1498           */
1499          hypre_ParCSRMatrixMatvec(mone, Aperm, xtemp, one, ftemp);
1500          /* with R is complex */
1501          /* copy partial data in */
1502          hypre_TMemcpy( rhs_data, ftemp_data + nLU, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1503 
1504          /* solve L^{-1} */
1505          if(nLU > 0)
1506          {
1507             if(isDoublePrecision)
1508             {
1509                /* L solve */
1510                HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1511                                                          nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1512                                                          (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1513                                                          (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1514             }
1515             else if(isSinglePrecision)
1516             {
1517                /* L solve */
1518                HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1519                                                          nLU, BLU_nnz, (float *) &one, matL_des,
1520                                                          (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1521                                                          (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1522             }
1523             /* -U^{-1}L^{-1} */
1524             if(isDoublePrecision)
1525             {
1526                /* U solve */
1527                HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1528                                                          nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1529                                                          (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1530                                                          (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1531             }
1532             else if(isSinglePrecision)
1533             {
1534                /* U solve */
1535                HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1536                                                          nLU, BLU_nnz, (float *) &one, matU_des,
1537                                                          (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1538                                                          (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1539             }
1540          }
1541          /* -EU^{-1}L^{-1} */
1542          hypre_CSRMatrixMatvec(mone, matE_d, ftemp_upper, one, rhs_local);
1543 
1544 
1545          /* now solve S
1546           */
1547          if(S)
1548          {
1549             /* if we have a schur complement */
1550             hypre_ParVectorSetConstantValues(x, 0.0);
1551             HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
1552 
1553             /* u = xtemp + P*x */
1554             /* -Fx */
1555             hypre_CSRMatrixMatvec(mone, matF_d, x_local, zero, ftemp_upper);
1556             /* -L^{-1}Fx */
1557             if(nLU > 0)
1558             {
1559                if(isDoublePrecision)
1560                {
1561                   /* L solve */
1562                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1563                                                             nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1564                                                             (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1565                                                             (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1566                }
1567                else if(isSinglePrecision)
1568                {
1569                   /* L solve */
1570                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1571                                                             nLU, BLU_nnz, (float *) &one, matL_des,
1572                                                             (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1573                                                             (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1574                }
1575                /* -U{-1}L^{-1}Fx */
1576                if(isDoublePrecision)
1577                {
1578                   /* U solve */
1579                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1580                                                             nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1581                                                             (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1582                                                             (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1583                }
1584                else if(isSinglePrecision)
1585                {
1586                   /* U solve */
1587                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1588                                                             nLU, BLU_nnz, (float *) &one, matU_des,
1589                                                             (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1590                                                             (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1591                }
1592             }
1593             /* now copy data to y_lower */
1594             hypre_TMemcpy( ftemp_data + nLU, x_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1595 
1596             /* correction to the residual */
1597             hypre_ParVectorAxpy(one, ftemp, xtemp);
1598 
1599          }
1600          else
1601          {
1602             /* otherwise just apply triangular solves */
1603             if(m > 0)
1604             {
1605                if(isDoublePrecision)
1606                {
1607                   /* L solve - Forward solve */
1608                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1609                                                             m, SLU_nnz, (hypre_double *) &one, matL_des,
1610                                                             (hypre_double *) SLU_data, SLU_i, SLU_j, matSL_info,
1611                                                             (hypre_double *) rhs_data, (hypre_double *) x_data, ilu_solve_policy, ilu_solve_buffer));
1612                }
1613                else if(isSinglePrecision)
1614                {
1615                   /* L solve - Forward solve */
1616                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1617                                                             m, SLU_nnz, (float *) &one, matL_des,
1618                                                             (float *) SLU_data, SLU_i, SLU_j, matSL_info,
1619                                                             (float *) rhs_data, (float *) x_data, ilu_solve_policy, ilu_solve_buffer));
1620                }
1621                if(isDoublePrecision)
1622                {
1623                   /* U solve - Forward solve */
1624                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1625                                                             m, SLU_nnz, (hypre_double *) &one, matU_des,
1626                                                             (hypre_double *) SLU_data, SLU_i, SLU_j, matSU_info,
1627                                                             (hypre_double *) x_data, (hypre_double *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1628                }
1629                else if(isSinglePrecision)
1630                {
1631                   /* U solve - Forward solve */
1632                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1633                                                             m, SLU_nnz, (float *) &one, matU_des,
1634                                                             (float *) SLU_data, SLU_i, SLU_j, matSU_info,
1635                                                             (float *) x_data, (float *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1636                }
1637             }
1638 
1639             /* u = xtemp + P*x */
1640             /* -Fx */
1641             hypre_CSRMatrixMatvec(mone, matF_d, rhs_local, zero, ftemp_upper);
1642             /* -L^{-1}Fx */
1643             if(nLU > 0)
1644             {
1645                if(isDoublePrecision)
1646                {
1647                   /* L solve */
1648                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1649                                                             nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1650                                                             (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1651                                                             (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1652                }
1653                else if(isSinglePrecision)
1654                {
1655                   /* L solve */
1656                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1657                                                             nLU, BLU_nnz, (float *) &one, matL_des,
1658                                                             (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1659                                                             (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1660                }
1661                /* -U{-1}L^{-1}Fx */
1662                if(isDoublePrecision)
1663                {
1664                   /* U solve */
1665                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1666                                                             nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1667                                                             (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1668                                                             (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1669                }
1670                else if(isSinglePrecision)
1671                {
1672                   /* U solve */
1673                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1674                                                             nLU, BLU_nnz, (float *) &one, matU_des,
1675                                                             (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1676                                                             (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1677                }
1678             }
1679             /* now copy data to y_lower */
1680             hypre_TMemcpy( ftemp_data + nLU, rhs_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1681 
1682             hypre_ParVectorAxpy(one, ftemp, xtemp);
1683 
1684          }
1685 
1686          /* perm back */
1687          HYPRE_THRUST_CALL(scatter,xtemp_data, xtemp_data + n, perm, ftemp_data);
1688 
1689          /* done, now everything are in u_temp, update solution */
1690          hypre_ParVectorAxpy(one, ftemp, u);
1691       }
1692       break;
1693       case 0: case 2: default:
1694       {
1695          /* EU^{-1} and L^{-1}F */
1696          /* compute residual */
1697          hypre_ParCSRMatrixMatvecOutOfPlace(mone, A, u, one, f, ftemp);
1698          /* apply permutation before we can start our solve
1699           * Au=f -> (PAQ)Q'u=Pf
1700           */
1701          HYPRE_THRUST_CALL(gather, perm, perm + n, ftemp_data, utemp_data);
1702 
1703          /* A-smoothing
1704           * x = [UA\(LA\(P*f_u))] fill to xtemp
1705           */
1706          if(n > 0)
1707          {
1708             if(isDoublePrecision)
1709             {
1710                /* L solve - Forward solve */
1711                HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1712                                                          n, ALU_nnz, (hypre_double *) &one, matL_des,
1713                                                          (hypre_double *) ALU_data, ALU_i, ALU_j, matAL_info,
1714                                                          (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1715                /* U solve - Forward solve */
1716                HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1717                                                          n, ALU_nnz, (hypre_double *) &one, matU_des,
1718                                                          (hypre_double *) ALU_data, ALU_i, ALU_j, matAU_info,
1719                                                          (hypre_double *) ftemp_data, (hypre_double *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1720             }
1721             else if(isSinglePrecision)
1722             {
1723                /* L solve - Forward solve */
1724                HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1725                                                          n, ALU_nnz, (float *) &one, matL_des,
1726                                                          (float *) ALU_data, ALU_i, ALU_j, matAL_info,
1727                                                          (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1728                /* U solve - Forward solve */
1729                HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1730                                                          n, ALU_nnz, (float *) &one, matU_des,
1731                                                          (float *) ALU_data, ALU_i, ALU_j, matAU_info,
1732                                                          (float *) ftemp_data, (float *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1733             }
1734          }
1735          /* residual, we should not touch xtemp for now
1736           * r = R*(f-PAQx)
1737           */
1738          hypre_ParCSRMatrixMatvec(mone, Aperm, xtemp, one, utemp);
1739          /* with R is complex */
1740          /* copy partial data in */
1741          hypre_TMemcpy( rhs_data, utemp_data + nLU, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1742 
1743          /* solve L^{-1} */
1744          if(nLU > 0)
1745          {
1746             if(isDoublePrecision)
1747             {
1748                /* L solve */
1749                HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1750                                                          nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1751                                                          (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1752                                                          (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1753             }
1754             else if(isSinglePrecision)
1755             {
1756                /* L solve */
1757                HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1758                                                          nLU, BLU_nnz, (float *) &one, matL_des,
1759                                                          (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1760                                                          (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1761             }
1762          }
1763          /* -EU^{-1}L^{-1} */
1764          hypre_CSRMatrixMatvec(mone, matE_d, ftemp_upper, one, rhs_local);
1765 
1766 
1767          /* now solve S
1768           */
1769          if(S)
1770          {
1771             /* if we have a schur complement */
1772             hypre_ParVectorSetConstantValues(x, 0.0);
1773             HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
1774 
1775             /* u = xtemp + P*x */
1776             /* -L^{-1}Fx */
1777             hypre_CSRMatrixMatvec(mone, matF_d, x_local, zero, ftemp_upper);
1778             /* -U{-1}L^{-1}Fx */
1779             if(nLU > 0)
1780             {
1781                if(isDoublePrecision)
1782                {
1783                   /* U solve */
1784                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1785                                                             nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1786                                                             (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1787                                                             (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1788                }
1789                else if(isSinglePrecision)
1790                {
1791                   /* U solve */
1792                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1793                                                             nLU, BLU_nnz, (float *) &one, matU_des,
1794                                                             (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1795                                                             (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1796                }
1797             }
1798             /* now copy data to y_lower */
1799             hypre_TMemcpy( utemp_data + nLU, x_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1800 
1801             hypre_ParVectorAxpy(one, utemp, xtemp);
1802 
1803          }
1804          else
1805          {
1806             /* otherwise just apply triangular solves */
1807             if(m > 0)
1808             {
1809                if(isDoublePrecision)
1810                {
1811                   /* L solve - Forward solve */
1812                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1813                                                             m, SLU_nnz, (hypre_double *) &one, matL_des,
1814                                                             (hypre_double *) SLU_data, SLU_i, SLU_j, matSL_info,
1815                                                             (hypre_double *) rhs_data, (hypre_double *) x_data, ilu_solve_policy, ilu_solve_buffer));
1816                }
1817                else if(isSinglePrecision)
1818                {
1819                   /* L solve - Forward solve */
1820                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1821                                                             m, SLU_nnz, (float *) &one, matL_des,
1822                                                             (float *) SLU_data, SLU_i, SLU_j, matSL_info,
1823                                                             (float *) rhs_data, (float *) x_data, ilu_solve_policy, ilu_solve_buffer));
1824                }
1825                if(isDoublePrecision)
1826                {
1827                   /* U solve - Forward solve */
1828                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1829                                                             m, SLU_nnz, (hypre_double *) &one, matU_des,
1830                                                             (hypre_double *) SLU_data, SLU_i, SLU_j, matSU_info,
1831                                                             (hypre_double *) x_data, (hypre_double *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1832                }
1833                else if(isSinglePrecision)
1834                {
1835                   /* U solve - Forward solve */
1836                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1837                                                             m, SLU_nnz, (float *) &one, matU_des,
1838                                                             (float *) SLU_data, SLU_i, SLU_j, matSU_info,
1839                                                             (float *) x_data, (float *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1840                }
1841             }
1842             /* u = xtemp + P*x */
1843             /* -L^{-1}Fx */
1844             hypre_CSRMatrixMatvec(mone, matF_d, rhs_local, zero, ftemp_upper);
1845             /* -U{-1}L^{-1}Fx */
1846             if(nLU > 0)
1847             {
1848                if(isDoublePrecision)
1849                {
1850                   /* U solve */
1851                   HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1852                                                             nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1853                                                             (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1854                                                             (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1855                }
1856                else if(isSinglePrecision)
1857                {
1858                   /* U solve */
1859                   HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1860                                                             nLU, BLU_nnz, (float *) &one, matU_des,
1861                                                             (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1862                                                             (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1863                }
1864             }
1865             /* now copy data to y_lower */
1866             hypre_TMemcpy( utemp_data + nLU, rhs_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1867 
1868             hypre_ParVectorAxpy(one, utemp, xtemp);
1869 
1870          }
1871 
1872          /* perm back */
1873          HYPRE_THRUST_CALL(scatter,xtemp_data, xtemp_data + n, perm, ftemp_data);
1874 
1875          /* done, now everything are in u_temp, update solution */
1876          hypre_ParVectorAxpy(one, ftemp, u);
1877       }
1878       break;
1879    }
1880 
1881    return hypre_error_flag;
1882 }
1883 
1884 #endif
1885 
1886 HYPRE_Int
hypre_ILUSolveRAPGMRESHOST(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParCSRMatrix * mL,HYPRE_Real * mD,hypre_ParCSRMatrix * mU,hypre_ParVector * ftemp,hypre_ParVector * utemp,hypre_ParVector * xtemp,hypre_ParVector * ytemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end)1887 hypre_ILUSolveRAPGMRESHOST(hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm,
1888                            HYPRE_Int nLU, hypre_ParCSRMatrix *L, HYPRE_Real *D, hypre_ParCSRMatrix *U,
1889                            hypre_ParCSRMatrix *mL, HYPRE_Real *mD, hypre_ParCSRMatrix *mU,
1890                            hypre_ParVector *ftemp, hypre_ParVector *utemp,
1891                            hypre_ParVector *xtemp, hypre_ParVector *ytemp,
1892                            HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
1893                            hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end)
1894 {
1895 //#pragma omp parallel
1896 //        printf("threads %d\n",omp_get_num_threads());
1897    /* data objects for communication */
1898 //   MPI_Comm          comm = hypre_ParCSRMatrixComm(A);
1899 
1900    /* data objects for L and U */
1901    hypre_CSRMatrix   *L_diag = hypre_ParCSRMatrixDiag(L);
1902    HYPRE_Real        *L_diag_data = hypre_CSRMatrixData(L_diag);
1903    HYPRE_Int         *L_diag_i = hypre_CSRMatrixI(L_diag);
1904    HYPRE_Int         *L_diag_j = hypre_CSRMatrixJ(L_diag);
1905    hypre_CSRMatrix   *U_diag = hypre_ParCSRMatrixDiag(U);
1906    HYPRE_Real        *U_diag_data = hypre_CSRMatrixData(U_diag);
1907    HYPRE_Int         *U_diag_i = hypre_CSRMatrixI(U_diag);
1908    HYPRE_Int         *U_diag_j = hypre_CSRMatrixJ(U_diag);
1909 
1910    hypre_CSRMatrix   *mL_diag = hypre_ParCSRMatrixDiag(mL);
1911    HYPRE_Real        *mL_diag_data = hypre_CSRMatrixData(mL_diag);
1912    HYPRE_Int         *mL_diag_i = hypre_CSRMatrixI(mL_diag);
1913    HYPRE_Int         *mL_diag_j = hypre_CSRMatrixJ(mL_diag);
1914    hypre_CSRMatrix   *mU_diag = hypre_ParCSRMatrixDiag(mU);
1915    HYPRE_Real        *mU_diag_data = hypre_CSRMatrixData(mU_diag);
1916    HYPRE_Int         *mU_diag_i = hypre_CSRMatrixI(mU_diag);
1917    HYPRE_Int         *mU_diag_j = hypre_CSRMatrixJ(mU_diag);
1918 
1919    hypre_Vector      *utemp_local = hypre_ParVectorLocalVector(utemp);
1920    HYPRE_Real        *utemp_data  = hypre_VectorData(utemp_local);
1921    hypre_Vector      *ftemp_local = hypre_ParVectorLocalVector(ftemp);
1922    HYPRE_Real        *ftemp_data  = hypre_VectorData(ftemp_local);
1923    hypre_Vector      *xtemp_local = NULL;
1924    HYPRE_Real        *xtemp_data  = NULL;
1925    hypre_Vector      *ytemp_local = NULL;
1926    HYPRE_Real        *ytemp_data  = NULL;
1927    if(xtemp)
1928    {
1929       /* xtemp might be null when we have no Schur complement */
1930       xtemp_local = hypre_ParVectorLocalVector(xtemp);
1931       xtemp_data  = hypre_VectorData(xtemp_local);
1932       ytemp_local = hypre_ParVectorLocalVector(ytemp);
1933       ytemp_data  = hypre_VectorData(ytemp_local);
1934    }
1935 
1936    HYPRE_Real        alpha;
1937    HYPRE_Real        beta;
1938    HYPRE_Int         i, j, k1, k2, col;
1939 
1940    /* problem size */
1941    HYPRE_Int         n = hypre_CSRMatrixNumRows(L_diag);
1942    HYPRE_Int         m = n - nLU;
1943 
1944    /* other data objects for computation */
1945    //hypre_Vector      *f_local;
1946    //HYPRE_Real        *f_data;
1947    hypre_Vector      *rhs_local;
1948    HYPRE_Real        *rhs_data;
1949    hypre_Vector      *x_local;
1950    HYPRE_Real        *x_data;
1951 
1952    /* begin */
1953    beta = 1.0;
1954    alpha = -1.0;
1955 
1956    if(m > 0)
1957    {
1958       /* setup vectors for solve */
1959       rhs_local   = hypre_ParVectorLocalVector(rhs);
1960       rhs_data    = hypre_VectorData(rhs_local);
1961       x_local     = hypre_ParVectorLocalVector(x);
1962       x_data      = hypre_VectorData(x_local);
1963 
1964    }
1965 
1966    /* only support RAP with partial factorized W and Z */
1967 
1968    /* compute residual */
1969    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
1970 
1971    /* A-smoothing f_temp = [UA \ LA \ (f_temp[perm])] */
1972    /* permuted L solve */
1973    for(i = 0 ; i < n ; i ++)
1974    {
1975       utemp_data[i] = ftemp_data[perm[i]];
1976       k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
1977       for(j = k1 ; j < k2 ; j ++)
1978       {
1979          col = L_diag_j[j];
1980          utemp_data[i] -= L_diag_data[j] * utemp_data[col];
1981       }
1982    }
1983 
1984    if(!xtemp)
1985    {
1986       /* in this case, we don't have a Schur complement */
1987       /* U solve */
1988       for(i = n-1 ; i >= 0 ; i --)
1989       {
1990          ftemp_data[perm[i]] = utemp_data[i];
1991          k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
1992          for(j = k1 ; j < k2 ; j ++)
1993          {
1994             col = U_diag_j[j];
1995             ftemp_data[perm[i]] -= U_diag_data[j] * ftemp_data[perm[col]];
1996          }
1997          ftemp_data[perm[i]] *= D[i];
1998       }
1999 
2000       hypre_ParVectorAxpy(beta, ftemp, u);
2001 
2002       return hypre_error_flag;
2003    }
2004 
2005    /* U solve */
2006    for(i = n-1 ; i >= 0 ; i --)
2007    {
2008       xtemp_data[perm[i]] = utemp_data[i];
2009       k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
2010       for(j = k1 ; j < k2 ; j ++)
2011       {
2012          col = U_diag_j[j];
2013          xtemp_data[perm[i]] -= U_diag_data[j] * xtemp_data[perm[col]];
2014       }
2015       xtemp_data[perm[i]] *= D[i];
2016    }
2017 
2018    /* coarse-grid correction */
2019    /* now f_temp is the result of A-smoothing
2020     * rhs = R*(b - Ax)
2021     * */
2022    // utemp = (ftemp - A*xtemp)
2023    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, xtemp, beta, ftemp, utemp);
2024 
2025    // R = [-L21 L\inv, I]
2026    if( m > 0)
2027    {
2028       /* first is L solve */
2029       for(i = 0 ; i < nLU ; i ++)
2030       {
2031          ytemp_data[i] = utemp_data[perm[i]];
2032          k1 = mL_diag_i[i] ; k2 = mL_diag_i[i+1];
2033          for(j = k1 ; j < k2 ; j ++)
2034          {
2035             col = mL_diag_j[j];
2036             ytemp_data[i] -= mL_diag_data[j] * ytemp_data[col];
2037          }
2038       }
2039 
2040       /* apply -W * ytemp on this, and take care of the I part */
2041       for(i = nLU ; i < n ; i ++)
2042       {
2043          rhs_data[i - nLU] = utemp_data[perm[i]];
2044          k1 = mL_diag_i[i] ; k2 = u_end[i];
2045          for(j = k1 ; j < k2 ; j ++)
2046          {
2047             col = mL_diag_j[j];
2048             rhs_data[i - nLU] -= mL_diag_data[j] * ytemp_data[col];
2049          }
2050       }
2051    }
2052 
2053    /* now the rhs is ready */
2054    hypre_SeqVectorSetConstantValues(x_local, 0.0);
2055    HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
2056 
2057    if(m > 0)
2058    {
2059       /*
2060       for(i = 0 ; i < m ; i ++)
2061       {
2062          x_data[i] = rhs_data[i];
2063          k1 = u_end[i+nLU] ; k2 = mL_diag_i[i+nLU+1];
2064          for(j = k1 ; j < k2 ; j ++)
2065          {
2066             col = mL_diag_j[j];
2067             x_data[i] -= mL_diag_data[j] * x_data[col-nLU];
2068          }
2069       }
2070 
2071       for(i = m-1 ; i >= 0 ; i --)
2072       {
2073          rhs_data[i] = x_data[i];
2074          k1 = mU_diag_i[i+nLU] ; k2 = mU_diag_i[i+1+nLU];
2075          for(j = k1 ; j < k2 ; j ++)
2076          {
2077             col = mU_diag_j[j];
2078             rhs_data[i] -= mU_diag_data[j] * rhs_data[col-nLU];
2079          }
2080          rhs_data[i] *= mD[i];
2081       }
2082       */
2083 
2084       /* after solve, update x = x + Pv
2085        * that is, xtemp = xtemp + P*x
2086        */
2087       /* first compute P*x
2088        * P = [ -U\inv U_12 ]
2089        *     [  I          ]
2090        */
2091       /* matvec */
2092       for(i = 0 ; i < nLU ; i ++)
2093       {
2094          ytemp_data[i] = 0.0;
2095          k1 = u_end[i] ; k2 = mU_diag_i[i+1];
2096          for(j = k1 ; j < k2 ; j ++)
2097          {
2098             col = mU_diag_j[j];
2099             ytemp_data[i] -= mU_diag_data[j] * x_data[col-nLU];
2100          }
2101       }
2102       /* U solve */
2103       for(i = nLU-1 ; i >= 0 ; i --)
2104       {
2105          ftemp_data[perm[i]] = ytemp_data[i];
2106          k1 = mU_diag_i[i] ; k2 = u_end[i];
2107          for(j = k1 ; j < k2 ; j ++)
2108          {
2109             col = mU_diag_j[j];
2110             ftemp_data[perm[i]] -= mU_diag_data[j] * ftemp_data[perm[col]];
2111          }
2112          ftemp_data[perm[i]] *= mD[i];
2113       }
2114 
2115       /* update with I */
2116       for(i = nLU ; i < n ; i ++)
2117       {
2118          ftemp_data[perm[i]] = x_data[i-nLU];
2119       }
2120       hypre_ParVectorAxpy(beta, ftemp, u);
2121    }
2122 
2123    hypre_ParVectorAxpy(beta, xtemp, u);
2124 
2125    return hypre_error_flag;
2126 }
2127 
2128 /* solve functions for NSH */
2129 
2130 /*--------------------------------------------------------------------
2131  * hypre_NSHSolve
2132  *--------------------------------------------------------------------*/
2133 HYPRE_Int
hypre_NSHSolve(void * nsh_vdata,hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u)2134 hypre_NSHSolve( void               *nsh_vdata,
2135                   hypre_ParCSRMatrix *A,
2136                   hypre_ParVector    *f,
2137                   hypre_ParVector    *u )
2138 {
2139    MPI_Comm             comm = hypre_ParCSRMatrixComm(A);
2140    //   HYPRE_Int            i;
2141 
2142    hypre_ParNSHData     *nsh_data = (hypre_ParNSHData*) nsh_vdata;
2143 
2144    /* get matrices */
2145    hypre_ParCSRMatrix   *matA = hypre_ParNSHDataMatA(nsh_data);
2146    hypre_ParCSRMatrix   *matM = hypre_ParNSHDataMatM(nsh_data);
2147 
2148    HYPRE_Int            iter, num_procs,  my_id;
2149 
2150    hypre_ParVector      *F_array = hypre_ParNSHDataF(nsh_data);
2151    hypre_ParVector      *U_array = hypre_ParNSHDataU(nsh_data);
2152 
2153    /* get settings */
2154    HYPRE_Real           tol = hypre_ParNSHDataTol(nsh_data);
2155    HYPRE_Int            logging = hypre_ParNSHDataLogging(nsh_data);
2156    HYPRE_Int            print_level = hypre_ParNSHDataPrintLevel(nsh_data);
2157    HYPRE_Int            max_iter = hypre_ParNSHDataMaxIter(nsh_data);
2158    HYPRE_Real           *norms = hypre_ParNSHDataRelResNorms(nsh_data);
2159    hypre_ParVector      *Ftemp = hypre_ParNSHDataFTemp(nsh_data);
2160    hypre_ParVector      *Utemp = hypre_ParNSHDataUTemp(nsh_data);
2161    hypre_ParVector      *residual;
2162 
2163    HYPRE_Real           alpha = -1.0;
2164    HYPRE_Real           beta = 1.0;
2165    HYPRE_Real           conv_factor = 0.0;
2166    HYPRE_Real           resnorm = 1.0;
2167    HYPRE_Real           init_resnorm = 0.0;
2168    HYPRE_Real           rel_resnorm;
2169    HYPRE_Real           rhs_norm = 0.0;
2170    HYPRE_Real           old_resnorm;
2171    HYPRE_Real           ieee_check = 0.;
2172    HYPRE_Real           operat_cmplxty = hypre_ParNSHDataOperatorComplexity(nsh_data);
2173 
2174    HYPRE_Int            Solve_err_flag;
2175 
2176    /* problem size */
2177    //   HYPRE_Int            n = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
2178 
2179    /* begin */
2180    if(logging > 1)
2181    {
2182       residual = hypre_ParNSHDataResidual(nsh_data);
2183    }
2184 
2185    hypre_ParNSHDataNumIterations(nsh_data) = 0;
2186 
2187    hypre_MPI_Comm_size(comm, &num_procs);
2188    hypre_MPI_Comm_rank(comm,&my_id);
2189 
2190    /*-----------------------------------------------------------------------
2191     *    Write the solver parameters
2192     *-----------------------------------------------------------------------*/
2193    if (my_id == 0 && print_level > 1)
2194    {
2195       hypre_NSHWriteSolverParams(nsh_data);
2196    }
2197 
2198    /*-----------------------------------------------------------------------
2199     *    Initialize the solver error flag
2200     *-----------------------------------------------------------------------*/
2201 
2202    Solve_err_flag = 0;
2203    /*-----------------------------------------------------------------------
2204     *     write some initial info
2205     *-----------------------------------------------------------------------*/
2206 
2207    if (my_id == 0 && print_level > 1 && tol > 0.)
2208    {
2209       hypre_printf("\n\n Newton–Schulz–Hotelling SOLVER SOLUTION INFO:\n");
2210    }
2211 
2212 
2213    /*-----------------------------------------------------------------------
2214     *    Compute initial residual and print
2215     *-----------------------------------------------------------------------*/
2216    if (print_level > 1 || logging > 1 || tol > 0.)
2217    {
2218       if ( logging > 1 )
2219       {
2220          hypre_ParVectorCopy(f, residual );
2221          if (tol > 0.0)
2222          {
2223             hypre_ParCSRMatrixMatvec(alpha, A, u, beta, residual );
2224          }
2225          resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
2226       }
2227       else
2228       {
2229          hypre_ParVectorCopy(f, Ftemp);
2230          if (tol > 0.0)
2231          {
2232             hypre_ParCSRMatrixMatvec(alpha, A, u, beta, Ftemp);
2233          }
2234          resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
2235       }
2236 
2237       /* Since it is does not diminish performance, attempt to return an error flag
2238          and notify users when they supply bad input. */
2239       if (resnorm != 0.)
2240       {
2241          ieee_check = resnorm/resnorm; /* INF -> NaN conversion */
2242       }
2243       if (ieee_check != ieee_check)
2244       {
2245          /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
2246             for ieee_check self-equality works on all IEEE-compliant compilers/
2247             machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
2248             by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
2249             found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
2250          if (print_level > 0)
2251          {
2252             hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
2253             hypre_printf("ERROR -- hypre_NSHSolve: INFs and/or NaNs detected in input.\n");
2254             hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n");
2255             hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
2256          }
2257          hypre_error(HYPRE_ERROR_GENERIC);
2258          return hypre_error_flag;
2259       }
2260 
2261       init_resnorm = resnorm;
2262       rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f));
2263       if (rhs_norm > HYPRE_REAL_EPSILON)
2264       {
2265          rel_resnorm = init_resnorm / rhs_norm;
2266       }
2267       else
2268       {
2269          /* rhs is zero, return a zero solution */
2270          hypre_ParVectorSetConstantValues(U_array, 0.0);
2271          if(logging > 0)
2272          {
2273             rel_resnorm = 0.0;
2274             hypre_ParNSHDataFinalRelResidualNorm(nsh_data) = rel_resnorm;
2275          }
2276          return hypre_error_flag;
2277       }
2278    }
2279    else
2280    {
2281       rel_resnorm = 1.;
2282    }
2283 
2284    if (my_id == 0 && print_level > 1)
2285    {
2286       hypre_printf("                                            relative\n");
2287       hypre_printf("               residual        factor       residual\n");
2288       hypre_printf("               --------        ------       --------\n");
2289       hypre_printf("    Initial    %e                 %e\n",init_resnorm,
2290             rel_resnorm);
2291    }
2292 
2293    matA = A;
2294    U_array = u;
2295    F_array = f;
2296 
2297    /************** Main Solver Loop - always do 1 iteration ************/
2298    iter = 0;
2299 
2300    while ((rel_resnorm >= tol || iter < 1)
2301          && iter < max_iter)
2302    {
2303 
2304       /* Do one solve on e = Mr */
2305       hypre_NSHSolveInverse(matA, f, u, matM, Utemp, Ftemp);
2306 
2307       /*---------------------------------------------------------------
2308        *    Compute residual and residual norm
2309        *----------------------------------------------------------------*/
2310 
2311       if (print_level > 1 || logging > 1 || tol > 0.)
2312       {
2313          old_resnorm = resnorm;
2314 
2315          if ( logging > 1 ) {
2316             hypre_ParVectorCopy(F_array, residual);
2317             hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, residual );
2318             resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
2319          }
2320          else {
2321             hypre_ParVectorCopy(F_array, Ftemp);
2322             hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, Ftemp);
2323             resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
2324          }
2325 
2326          if (old_resnorm) conv_factor = resnorm / old_resnorm;
2327          else conv_factor = resnorm;
2328          if (rhs_norm > HYPRE_REAL_EPSILON)
2329          {
2330             rel_resnorm = resnorm / rhs_norm;
2331          }
2332          else
2333          {
2334             rel_resnorm = resnorm;
2335          }
2336 
2337          norms[iter] = rel_resnorm;
2338       }
2339 
2340       ++iter;
2341       hypre_ParNSHDataNumIterations(nsh_data) = iter;
2342       hypre_ParNSHDataFinalRelResidualNorm(nsh_data) = rel_resnorm;
2343 
2344       if (my_id == 0 && print_level > 1)
2345       {
2346          hypre_printf("    NSHSolve %2d   %e    %f     %e \n", iter,
2347                resnorm, conv_factor, rel_resnorm);
2348       }
2349    }
2350 
2351    /* check convergence within max_iter */
2352    if (iter == max_iter && tol > 0.)
2353    {
2354       Solve_err_flag = 1;
2355       hypre_error(HYPRE_ERROR_CONV);
2356    }
2357 
2358    /*-----------------------------------------------------------------------
2359     *    Print closing statistics
2360     *    Add operator and grid complexity stats
2361     *-----------------------------------------------------------------------*/
2362 
2363    if (iter > 0 && init_resnorm)
2364    {
2365       conv_factor = pow((resnorm/init_resnorm),(1.0/(HYPRE_Real) iter));
2366    }
2367    else
2368    {
2369       conv_factor = 1.;
2370    }
2371 
2372    if (print_level > 1)
2373    {
2374       /*** compute operator and grid complexity (fill factor) here ?? ***/
2375       if (my_id == 0)
2376       {
2377          if (Solve_err_flag == 1)
2378          {
2379             hypre_printf("\n\n==============================================");
2380             hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
2381             hypre_printf("      within the allowed %d iterations\n",max_iter);
2382             hypre_printf("==============================================");
2383          }
2384          hypre_printf("\n\n Average Convergence Factor = %f \n",conv_factor);
2385          hypre_printf("                operator = %f\n",operat_cmplxty);
2386       }
2387    }
2388    return hypre_error_flag;
2389 }
2390 
2391 /* NSH solve
2392  * Simply a matvec on residual with approximate inverse
2393  * A: original matrix
2394  * f: rhs
2395  * u: solution
2396  * M: approximate inverse
2397  * ftemp, utemp: working vectors
2398 */
2399 HYPRE_Int
hypre_NSHSolveInverse(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,hypre_ParCSRMatrix * M,hypre_ParVector * ftemp,hypre_ParVector * utemp)2400 hypre_NSHSolveInverse(hypre_ParCSRMatrix *A, hypre_ParVector *f,
2401                         hypre_ParVector *u, hypre_ParCSRMatrix *M,
2402                         hypre_ParVector *ftemp, hypre_ParVector *utemp)
2403 {
2404    HYPRE_Real      alpha;
2405    HYPRE_Real      beta;
2406 
2407    /* begin */
2408    alpha = -1.0;
2409    beta = 1.0;
2410    /* r = f-Au */
2411    hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
2412    /* e = Mr */
2413    hypre_ParCSRMatrixMatvec(1.0, M, ftemp, 0.0, utemp);
2414    /* u = u + e */
2415    hypre_ParVectorAxpy(beta, utemp, u);
2416    return hypre_error_flag;
2417 }
2418