1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * ILU solve routine
11 *
12 *****************************************************************************/
13
14 #include "_hypre_parcsr_ls.h"
15 #include "_hypre_utilities.hpp"
16 #include "par_ilu.h"
17
18 /*--------------------------------------------------------------------
19 * hypre_ILUSolve
20 *--------------------------------------------------------------------*/
21 HYPRE_Int
hypre_ILUSolve(void * ilu_vdata,hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u)22 hypre_ILUSolve( void *ilu_vdata,
23 hypre_ParCSRMatrix *A,
24 hypre_ParVector *f,
25 hypre_ParVector *u )
26 {
27 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
28 // HYPRE_Int i;
29
30 hypre_ParILUData *ilu_data = (hypre_ParILUData*) ilu_vdata;
31
32 #ifdef HYPRE_USING_CUDA
33 /* pointers to cusparse data, note that they are not NULL only when needed */
34 cusparseMatDescr_t matL_des = hypre_ParILUDataMatLMatrixDescription(ilu_data);
35 cusparseMatDescr_t matU_des = hypre_ParILUDataMatUMatrixDescription(ilu_data);
36 void *ilu_solve_buffer = hypre_ParILUDataILUSolveBuffer(ilu_data);//device memory
37 cusparseSolvePolicy_t ilu_solve_policy = hypre_ParILUDataILUSolvePolicy(ilu_data);
38 hypre_CSRMatrix *matALU_d = hypre_ParILUDataMatAILUDevice(ilu_data);
39 hypre_CSRMatrix *matBLU_d = hypre_ParILUDataMatBILUDevice(ilu_data);
40 //hypre_CSRMatrix *matSLU_d = hypre_ParILUDataMatSILUDevice(ilu_data);
41 hypre_CSRMatrix *matE_d = hypre_ParILUDataMatEDevice(ilu_data);
42 hypre_CSRMatrix *matF_d = hypre_ParILUDataMatFDevice(ilu_data);
43 csrsv2Info_t matAL_info = hypre_ParILUDataMatALILUSolveInfo(ilu_data);
44 csrsv2Info_t matAU_info = hypre_ParILUDataMatAUILUSolveInfo(ilu_data);
45 csrsv2Info_t matBL_info = hypre_ParILUDataMatBLILUSolveInfo(ilu_data);
46 csrsv2Info_t matBU_info = hypre_ParILUDataMatBUILUSolveInfo(ilu_data);
47 csrsv2Info_t matSL_info = hypre_ParILUDataMatSLILUSolveInfo(ilu_data);
48 csrsv2Info_t matSU_info = hypre_ParILUDataMatSUILUSolveInfo(ilu_data);
49 hypre_ParCSRMatrix *Aperm = hypre_ParILUDataAperm(ilu_data);
50 //hypre_ParCSRMatrix *R = hypre_ParILUDataR(ilu_data);
51 //hypre_ParCSRMatrix *P = hypre_ParILUDataP(ilu_data);
52 #endif
53
54 /* get matrices */
55 HYPRE_Int ilu_type = hypre_ParILUDataIluType(ilu_data);
56 HYPRE_Int *perm = hypre_ParILUDataPerm(ilu_data);
57 HYPRE_Int *qperm = hypre_ParILUDataQPerm(ilu_data);
58 hypre_ParCSRMatrix *matA = hypre_ParILUDataMatA(ilu_data);
59 hypre_ParCSRMatrix *matL = hypre_ParILUDataMatL(ilu_data);
60 HYPRE_Real *matD = hypre_ParILUDataMatD(ilu_data);
61 hypre_ParCSRMatrix *matU = hypre_ParILUDataMatU(ilu_data);
62 #ifndef HYPRE_USING_CUDA
63 hypre_ParCSRMatrix *matmL = hypre_ParILUDataMatLModified(ilu_data);
64 HYPRE_Real *matmD = hypre_ParILUDataMatDModified(ilu_data);
65 hypre_ParCSRMatrix *matmU = hypre_ParILUDataMatUModified(ilu_data);
66 #endif
67 hypre_ParCSRMatrix *matS = hypre_ParILUDataMatS(ilu_data);
68
69 HYPRE_Int iter, num_procs, my_id;
70
71 hypre_ParVector *F_array = hypre_ParILUDataF(ilu_data);
72 hypre_ParVector *U_array = hypre_ParILUDataU(ilu_data);
73
74 /* get settings */
75 HYPRE_Real tol = hypre_ParILUDataTol(ilu_data);
76 HYPRE_Int logging = hypre_ParILUDataLogging(ilu_data);
77 HYPRE_Int print_level = hypre_ParILUDataPrintLevel(ilu_data);
78 HYPRE_Int max_iter = hypre_ParILUDataMaxIter(ilu_data);
79 HYPRE_Real *norms = hypre_ParILUDataRelResNorms(ilu_data);
80 hypre_ParVector *Ftemp = hypre_ParILUDataFTemp(ilu_data);
81 hypre_ParVector *Utemp = hypre_ParILUDataUTemp(ilu_data);
82 hypre_ParVector *Xtemp = hypre_ParILUDataXTemp(ilu_data);
83 hypre_ParVector *Ytemp = hypre_ParILUDataYTemp(ilu_data);
84 HYPRE_Real *fext = hypre_ParILUDataFExt(ilu_data);
85 HYPRE_Real *uext = hypre_ParILUDataUExt(ilu_data);
86 hypre_ParVector *residual;
87
88 HYPRE_Real alpha = -1;
89 HYPRE_Real beta = 1;
90 HYPRE_Real conv_factor = 0.0;
91 HYPRE_Real resnorm = 1.0;
92 HYPRE_Real init_resnorm = 0.0;
93 HYPRE_Real rel_resnorm;
94 HYPRE_Real rhs_norm = 0.0;
95 HYPRE_Real old_resnorm;
96 HYPRE_Real ieee_check = 0.0;
97 HYPRE_Real operat_cmplxty = hypre_ParILUDataOperatorComplexity(ilu_data);
98
99 HYPRE_Int Solve_err_flag;
100 #ifdef HYPRE_USING_CUDA
101 HYPRE_Int test_opt;
102 #endif
103
104 /* problem size */
105 HYPRE_Int n = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
106 HYPRE_Int nLU = hypre_ParILUDataNLU(ilu_data);
107 HYPRE_Int *u_end = hypre_ParILUDataUEnd(ilu_data);
108
109 /* Schur system solve */
110 HYPRE_Solver schur_solver = hypre_ParILUDataSchurSolver(ilu_data);
111 HYPRE_Solver schur_precond = hypre_ParILUDataSchurPrecond(ilu_data);
112 hypre_ParVector *rhs = hypre_ParILUDataRhs(ilu_data);
113 hypre_ParVector *x = hypre_ParILUDataX(ilu_data);
114
115 /* begin */
116 HYPRE_ANNOTATE_FUNC_BEGIN;
117
118 if(logging > 1)
119 {
120 residual = hypre_ParILUDataResidual(ilu_data);
121 }
122
123 hypre_ParILUDataNumIterations(ilu_data) = 0;
124
125 hypre_MPI_Comm_size(comm, &num_procs);
126 hypre_MPI_Comm_rank(comm,&my_id);
127
128 /*-----------------------------------------------------------------------
129 * Write the solver parameters
130 *-----------------------------------------------------------------------*/
131 if (my_id == 0 && print_level > 1)
132 {
133 hypre_ILUWriteSolverParams(ilu_data);
134 }
135
136 /*-----------------------------------------------------------------------
137 * Initialize the solver error flag
138 *-----------------------------------------------------------------------*/
139
140 Solve_err_flag = 0;
141 /*-----------------------------------------------------------------------
142 * write some initial info
143 *-----------------------------------------------------------------------*/
144
145 if (my_id == 0 && print_level > 1 && tol > 0.)
146 {
147 hypre_printf("\n\n ILU SOLVER SOLUTION INFO:\n");
148 }
149
150
151 /*-----------------------------------------------------------------------
152 * Compute initial residual and print
153 *-----------------------------------------------------------------------*/
154 if (print_level > 1 || logging > 1 || tol > 0.)
155 {
156 if ( logging > 1 )
157 {
158 hypre_ParVectorCopy(f, residual );
159 if (tol > 0.0)
160 {
161 hypre_ParCSRMatrixMatvec(alpha, A, u, beta, residual );
162 }
163 resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
164 }
165 else
166 {
167 hypre_ParVectorCopy(f, Ftemp);
168 if (tol > 0.0)
169 {
170 hypre_ParCSRMatrixMatvec(alpha, A, u, beta, Ftemp);
171 }
172 resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
173 }
174
175 /* Since it is does not diminish performance, attempt to return an error flag
176 and notify users when they supply bad input. */
177 if (resnorm != 0.)
178 {
179 ieee_check = resnorm/resnorm; /* INF -> NaN conversion */
180 }
181 if (ieee_check != ieee_check)
182 {
183 /* ...INFs or NaNs in input can make ieee_check a NaN. This test
184 for ieee_check self-equality works on all IEEE-compliant compilers/
185 machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
186 by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be
187 found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
188 if (print_level > 0)
189 {
190 hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
191 hypre_printf("ERROR -- hypre_ILUSolve: INFs and/or NaNs detected in input.\n");
192 hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n");
193 hypre_printf("ERROR detected by Hypre ... END\n\n\n");
194 }
195 hypre_error(HYPRE_ERROR_GENERIC);
196 HYPRE_ANNOTATE_FUNC_END;
197
198 return hypre_error_flag;
199 }
200
201 init_resnorm = resnorm;
202 rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f));
203 if (rhs_norm > HYPRE_REAL_EPSILON)
204 {
205 rel_resnorm = init_resnorm / rhs_norm;
206 }
207 else
208 {
209 /* rhs is zero, return a zero solution */
210 hypre_ParVectorSetConstantValues(U_array, 0.0);
211 if(logging > 0)
212 {
213 rel_resnorm = 0.0;
214 hypre_ParILUDataFinalRelResidualNorm(ilu_data) = rel_resnorm;
215 }
216 HYPRE_ANNOTATE_FUNC_END;
217
218 return hypre_error_flag;
219 }
220 }
221 else
222 {
223 rel_resnorm = 1.;
224 }
225
226 if (my_id == 0 && print_level > 1)
227 {
228 hypre_printf(" relative\n");
229 hypre_printf(" residual factor residual\n");
230 hypre_printf(" -------- ------ --------\n");
231 hypre_printf(" Initial %e %e\n",init_resnorm,
232 rel_resnorm);
233 }
234
235 matA = A;
236 U_array = u;
237 F_array = f;
238
239 /************** Main Solver Loop - always do 1 iteration ************/
240 iter = 0;
241
242 while ((rel_resnorm >= tol || iter < 1)
243 && iter < max_iter)
244 {
245
246 /* Do one solve on LUe=r */
247 switch(ilu_type){
248 case 0: case 1:
249 #ifdef HYPRE_USING_CUDA
250 /* Apply GPU-accelerated LU solve */
251 hypre_ILUSolveCusparseLU(matA, matL_des, matU_des, matBL_info, matBU_info, matBLU_d, ilu_solve_policy,
252 ilu_solve_buffer, F_array, U_array, perm, n, Utemp, Ftemp);//BJ-cusparse
253 #else
254 hypre_ILUSolveLU(matA, F_array, U_array, perm, n, matL, matD, matU, Utemp, Ftemp); //BJ
255 #endif
256 break;
257 case 10: case 11:
258 #ifdef HYPRE_USING_CUDA
259 /* Apply GPU-accelerated LU solve */
260 hypre_ILUSolveCusparseSchurGMRES(matA, F_array, U_array, perm, nLU, matS, Utemp, Ftemp, schur_solver, schur_precond, rhs, x, u_end,
261 matL_des, matU_des, matBL_info, matBU_info, matSL_info, matSU_info,
262 matBLU_d, matE_d, matF_d, ilu_solve_policy, ilu_solve_buffer);//GMRES-cusparse
263 #else
264 hypre_ILUSolveSchurGMRES(matA, F_array, U_array, perm, perm, nLU, matL, matD, matU, matS,
265 Utemp, Ftemp, schur_solver, schur_precond, rhs, x, u_end); //GMRES
266 #endif
267 break;
268 case 20: case 21:
269 hypre_ILUSolveSchurNSH(matA, F_array, U_array, perm, nLU, matL, matD, matU, matS,
270 Utemp, Ftemp, schur_solver, rhs, x, u_end); //MR+NSH
271 break;
272 case 30: case 31:
273 hypre_ILUSolveLURAS(matA, F_array, U_array, perm, matL, matD, matU, Utemp, Utemp, fext, uext); //RAS
274 break;
275 case 40: case 41:
276 hypre_ILUSolveSchurGMRES(matA, F_array, U_array, perm, qperm, nLU, matL, matD, matU, matS,
277 Utemp, Ftemp, schur_solver, schur_precond, rhs, x, u_end); //GMRES
278 break;
279 case 50:
280 #ifdef HYPRE_USING_CUDA
281 test_opt = hypre_ParILUDataTestOption(ilu_data);
282 hypre_ILUSolveRAPGMRES(matA, F_array, U_array, perm, nLU, matS, Utemp, Ftemp, Xtemp, Ytemp, schur_solver, schur_precond, rhs, x, u_end,
283 matL_des, matU_des, matAL_info, matAU_info, matBL_info, matBU_info, matSL_info, matSU_info,
284 Aperm, matALU_d, matBLU_d, matE_d, matF_d, ilu_solve_policy, ilu_solve_buffer, test_opt);//GMRES-RAP
285 #else
286 hypre_ILUSolveRAPGMRESHOST(matA, F_array, U_array, perm, nLU, matL, matD, matU, matmL, matmD, matmU, Utemp, Ftemp, Xtemp, Ytemp,
287 schur_solver, schur_precond, rhs, x, u_end);//GMRES-RAP
288 #endif
289 break;
290 default:
291 #ifdef HYPRE_USING_CUDA
292 /* Apply GPU-accelerated LU solve */
293 hypre_ILUSolveCusparseLU(matA, matL_des, matU_des, matBL_info, matBU_info, matBLU_d, ilu_solve_policy,
294 ilu_solve_buffer, F_array, U_array, perm, n, Utemp, Ftemp);//BJ-cusparse
295 #else
296 hypre_ILUSolveLU(matA, F_array, U_array, perm, n, matL, matD, matU, Utemp, Ftemp); //BJ
297 #endif
298 break;
299
300 }
301
302 /*---------------------------------------------------------------
303 * Compute residual and residual norm
304 *----------------------------------------------------------------*/
305
306 if (print_level > 1 || logging > 1 || tol > 0.)
307 {
308 old_resnorm = resnorm;
309
310 if ( logging > 1 ) {
311 hypre_ParVectorCopy(F_array, residual);
312 hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, residual );
313 resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
314 }
315 else {
316 hypre_ParVectorCopy(F_array, Ftemp);
317 hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, Ftemp);
318 resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
319 }
320
321 if (old_resnorm) conv_factor = resnorm / old_resnorm;
322 else conv_factor = resnorm;
323 if (rhs_norm > HYPRE_REAL_EPSILON)
324 {
325 rel_resnorm = resnorm / rhs_norm;
326 }
327 else
328 {
329 rel_resnorm = resnorm;
330 }
331
332 norms[iter] = rel_resnorm;
333 }
334
335 ++iter;
336 hypre_ParILUDataNumIterations(ilu_data) = iter;
337 hypre_ParILUDataFinalRelResidualNorm(ilu_data) = rel_resnorm;
338
339 if (my_id == 0 && print_level > 1)
340 {
341 hypre_printf(" ILUSolve %2d %e %f %e \n", iter,
342 resnorm, conv_factor, rel_resnorm);
343 }
344 }
345
346 /* check convergence within max_iter */
347 if (iter == max_iter && tol > 0.)
348 {
349 Solve_err_flag = 1;
350 hypre_error(HYPRE_ERROR_CONV);
351 }
352
353 /*-----------------------------------------------------------------------
354 * Print closing statistics
355 * Add operator and grid complexity stats
356 *-----------------------------------------------------------------------*/
357
358 if (iter > 0 && init_resnorm)
359 {
360 conv_factor = pow((resnorm/init_resnorm),(1.0/(HYPRE_Real) iter));
361 }
362 else
363 {
364 conv_factor = 1.;
365 }
366
367 if (print_level > 1)
368 {
369 /*** compute operator and grid complexity (fill factor) here ?? ***/
370 if (my_id == 0)
371 {
372 if (Solve_err_flag == 1)
373 {
374 hypre_printf("\n\n==============================================");
375 hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
376 hypre_printf(" within the allowed %d iterations\n",max_iter);
377 hypre_printf("==============================================");
378 }
379 hypre_printf("\n\n Average Convergence Factor = %f \n",conv_factor);
380 hypre_printf(" operator = %f\n",operat_cmplxty);
381 }
382 }
383
384 HYPRE_ANNOTATE_FUNC_END;
385
386 return hypre_error_flag;
387 }
388
389 /* Schur Complement solve with GMRES on schur complement
390 * ParCSRMatrix S is already built in ilu data sturcture, here directly use S
391 * L, D and U factors only have local scope (no off-diagonal processor terms)
392 * so apart from the residual calculation (which uses A), the solves with the
393 * L and U factors are local.
394 * S is the global Schur complement
395 * schur_solver is a GMRES solver
396 * schur_precond is the ILU preconditioner for GMRES
397 * rhs and x are helper vector for solving Schur system
398 */
399
400 HYPRE_Int
hypre_ILUSolveSchurGMRES(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int * qperm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end)401 hypre_ILUSolveSchurGMRES(hypre_ParCSRMatrix *A, hypre_ParVector *f,
402 hypre_ParVector *u, HYPRE_Int *perm, HYPRE_Int *qperm,
403 HYPRE_Int nLU, hypre_ParCSRMatrix *L,
404 HYPRE_Real* D, hypre_ParCSRMatrix *U,
405 hypre_ParCSRMatrix *S,
406 hypre_ParVector *ftemp, hypre_ParVector *utemp,
407 HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
408 hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end)
409 {
410 /* data objects for communication */
411 // MPI_Comm comm = hypre_ParCSRMatrixComm(A);
412
413 /* data objects for L and U */
414 hypre_CSRMatrix *L_diag = hypre_ParCSRMatrixDiag(L);
415 HYPRE_Real *L_diag_data = hypre_CSRMatrixData(L_diag);
416 HYPRE_Int *L_diag_i = hypre_CSRMatrixI(L_diag);
417 HYPRE_Int *L_diag_j = hypre_CSRMatrixJ(L_diag);
418 hypre_CSRMatrix *U_diag = hypre_ParCSRMatrixDiag(U);
419 HYPRE_Real *U_diag_data = hypre_CSRMatrixData(U_diag);
420 HYPRE_Int *U_diag_i = hypre_CSRMatrixI(U_diag);
421 HYPRE_Int *U_diag_j = hypre_CSRMatrixJ(U_diag);
422 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
423 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
424 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
425 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
426
427 HYPRE_Real alpha;
428 HYPRE_Real beta;
429 HYPRE_Int i, j, k1, k2, col;
430
431 /* problem size */
432 HYPRE_Int n = hypre_CSRMatrixNumRows(L_diag);
433 // HYPRE_Int m = n - nLU;
434
435 /* other data objects for computation */
436 // hypre_Vector *f_local;
437 // HYPRE_Real *f_data;
438 hypre_Vector *rhs_local;
439 HYPRE_Real *rhs_data;
440 hypre_Vector *x_local;
441 HYPRE_Real *x_data;
442
443 /* begin */
444 beta = 1.0;
445 alpha = -1.0;
446
447 /* compute residual */
448 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
449
450 /* 1st need to solve LBi*xi = fi
451 * L solve, solve xi put in u_temp upper
452 */
453 // f_local = hypre_ParVectorLocalVector(f);
454 // f_data = hypre_VectorData(f_local);
455 /* now update with L to solve */
456 for(i = 0 ; i < nLU ; i ++)
457 {
458 utemp_data[qperm[i]] = ftemp_data[perm[i]];
459 k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
460 for(j = k1 ; j < k2 ; j ++)
461 {
462 utemp_data[qperm[i]] -= L_diag_data[j] * utemp_data[qperm[L_diag_j[j]]];
463 }
464 }
465
466 /* 2nd need to compute g'i = gi - Ei*UBi^-1*xi
467 * now put g'i into the f_temp lower
468 */
469 for(i = nLU ; i < n ; i ++)
470 {
471 k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
472 for(j = k1 ; j < k2 ; j ++)
473 {
474 col = L_diag_j[j];
475 ftemp_data[perm[i]] -= L_diag_data[j] * utemp_data[qperm[col]];
476 }
477 }
478
479 /* 3rd need to solve global Schur Complement Sy = g'
480 * for now only solve the local system
481 * solve y put in u_temp lower
482 * only solve whe S is not NULL
483 */
484 if(S)
485 {
486 /*initialize solution to zero for residual equation */
487 hypre_ParVectorSetConstantValues(x, 0.0);
488 /* setup vectors for solve */
489 rhs_local = hypre_ParVectorLocalVector(rhs);
490 rhs_data = hypre_VectorData(rhs_local);
491 x_local = hypre_ParVectorLocalVector(x);
492 x_data = hypre_VectorData(x_local);
493
494 /* set rhs value */
495 for(i = nLU ; i < n ; i ++)
496 {
497 rhs_data[i-nLU] = ftemp_data[perm[i]];
498 }
499
500 /* solve */
501 HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)S,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
502
503 /* copy value back to original */
504 for(i = nLU ; i < n ; i ++)
505 {
506 utemp_data[qperm[i]] = x_data[i-nLU];
507 }
508 }
509
510 /* 4th need to compute zi = xi - LBi^-1*Fi*yi
511 * put zi in f_temp upper
512 * only do this computation when nLU < n
513 * U is unsorted, search is expensive when unnecessary
514 */
515 if(nLU < n)
516 {
517 for(i = 0 ; i < nLU ; i ++)
518 {
519 ftemp_data[perm[i]] = utemp_data[qperm[i]];
520 k1 = u_end[i] ; k2 = U_diag_i[i+1];
521 for(j = k1 ; j < k2 ; j ++)
522 {
523 col = U_diag_j[j];
524 ftemp_data[perm[i]] -= U_diag_data[j] * utemp_data[qperm[col]];
525 }
526 }
527 for(i = 0 ; i < nLU ; i ++)
528 {
529 utemp_data[qperm[i]] = ftemp_data[perm[i]];
530 }
531 }
532
533 /* 5th need to solve UBi*ui = zi */
534 /* put result in u_temp upper */
535 for(i = nLU-1 ; i >= 0 ; i --)
536 {
537 k1 = U_diag_i[i] ; k2 = u_end[i];
538 for(j = k1 ; j < k2 ; j ++)
539 {
540 col = U_diag_j[j];
541 utemp_data[qperm[i]] -= U_diag_data[j] * utemp_data[qperm[col]];
542 }
543 utemp_data[qperm[i]] *= D[i];
544 }
545
546 /* done, now everything are in u_temp, update solution */
547 hypre_ParVectorAxpy(beta, utemp, u);
548 return hypre_error_flag;
549 }
550
551 /* Newton-Schulz-Hotelling solve
552 * ParCSRMatrix S is already built in ilu data sturcture
553 * S here is the INVERSE of Schur Complement
554 * L, D and U factors only have local scope (no off-diagonal processor terms)
555 * so apart from the residual calculation (which uses A), the solves with the
556 * L and U factors are local.
557 * S is the inverse global Schur complement
558 * rhs and x are helper vector for solving Schur system
559 */
560
561 HYPRE_Int
hypre_ILUSolveSchurNSH(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Solver schur_solver,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end)562 hypre_ILUSolveSchurNSH(hypre_ParCSRMatrix *A, hypre_ParVector *f,
563 hypre_ParVector *u, HYPRE_Int *perm,
564 HYPRE_Int nLU, hypre_ParCSRMatrix *L,
565 HYPRE_Real* D, hypre_ParCSRMatrix *U,
566 hypre_ParCSRMatrix *S,
567 hypre_ParVector *ftemp, hypre_ParVector *utemp,
568 HYPRE_Solver schur_solver,
569 hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end)
570 {
571 /* data objects for communication */
572 // MPI_Comm comm = hypre_ParCSRMatrixComm(A);
573
574 /* data objects for L and U */
575 hypre_CSRMatrix *L_diag = hypre_ParCSRMatrixDiag(L);
576 HYPRE_Real *L_diag_data = hypre_CSRMatrixData(L_diag);
577 HYPRE_Int *L_diag_i = hypre_CSRMatrixI(L_diag);
578 HYPRE_Int *L_diag_j = hypre_CSRMatrixJ(L_diag);
579 hypre_CSRMatrix *U_diag = hypre_ParCSRMatrixDiag(U);
580 HYPRE_Real *U_diag_data = hypre_CSRMatrixData(U_diag);
581 HYPRE_Int *U_diag_i = hypre_CSRMatrixI(U_diag);
582 HYPRE_Int *U_diag_j = hypre_CSRMatrixJ(U_diag);
583 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
584 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
585 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
586 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
587
588 HYPRE_Real alpha;
589 HYPRE_Real beta;
590 HYPRE_Int i, j, k1, k2, col;
591
592 /* problem size */
593 HYPRE_Int n = hypre_CSRMatrixNumRows(L_diag);
594 // HYPRE_Int m = n - nLU;
595
596 /* other data objects for computation */
597 // hypre_Vector *f_local;
598 // HYPRE_Real *f_data;
599 hypre_Vector *rhs_local;
600 HYPRE_Real *rhs_data;
601 hypre_Vector *x_local;
602 HYPRE_Real *x_data;
603
604 /* begin */
605 beta = 1.0;
606 alpha = -1.0;
607
608 /* compute residual */
609 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
610
611 /* 1st need to solve LBi*xi = fi
612 * L solve, solve xi put in u_temp upper
613 */
614 // f_local = hypre_ParVectorLocalVector(f);
615 // f_data = hypre_VectorData(f_local);
616 /* now update with L to solve */
617 for(i = 0 ; i < nLU ; i ++)
618 {
619 utemp_data[perm[i]] = ftemp_data[perm[i]];
620 k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
621 for(j = k1 ; j < k2 ; j ++)
622 {
623 utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[L_diag_j[j]]];
624 }
625 }
626
627 /* 2nd need to compute g'i = gi - Ei*UBi^-1*xi
628 * now put g'i into the f_temp lower
629 */
630 for(i = nLU ; i < n ; i ++)
631 {
632 k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
633 for(j = k1 ; j < k2 ; j ++)
634 {
635 col = L_diag_j[j];
636 ftemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[col]];
637 }
638 }
639
640 /* 3rd need to solve global Schur Complement Sy = g'
641 * for now only solve the local system
642 * solve y put in u_temp lower
643 * only solve when S is not NULL
644 */
645 if(S)
646 {
647 /*initialize solution to zero for residual equation */
648 hypre_ParVectorSetConstantValues(x, 0.0);
649 /* setup vectors for solve */
650 rhs_local = hypre_ParVectorLocalVector(rhs);
651 rhs_data = hypre_VectorData(rhs_local);
652 x_local = hypre_ParVectorLocalVector(x);
653 x_data = hypre_VectorData(x_local);
654
655 /* set rhs value */
656 for(i = nLU ; i < n ; i ++)
657 {
658 rhs_data[i-nLU] = ftemp_data[perm[i]];
659 }
660
661 /* Solve Schur system with approx inverse
662 * x = S*rhs
663 */
664 hypre_NSHSolve(schur_solver,S,rhs,x);
665
666 /* copy value back to original */
667 for(i = nLU ; i < n ; i ++)
668 {
669 utemp_data[perm[i]] = x_data[i-nLU];
670 }
671 }
672
673 /* 4th need to compute zi = xi - LBi^-1*yi
674 * put zi in f_temp upper
675 * only do this computation when nLU < n
676 * U is unsorted, search is expensive when unnecessary
677 */
678 if(nLU < n)
679 {
680 for(i = 0 ; i < nLU ; i ++)
681 {
682 ftemp_data[perm[i]] = utemp_data[perm[i]];
683 k1 = u_end[i] ; k2 = U_diag_i[i+1];
684 for(j = k1 ; j < k2 ; j ++)
685 {
686 col = U_diag_j[j];
687 ftemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[col]];
688 }
689 }
690 for(i = 0 ; i < nLU ; i ++)
691 {
692 utemp_data[perm[i]] = ftemp_data[perm[i]];
693 }
694 }
695
696 /* 5th need to solve UBi*ui = zi */
697 /* put result in u_temp upper */
698 for(i = nLU-1 ; i >= 0 ; i --)
699 {
700 k1 = U_diag_i[i] ; k2 = u_end[i];
701 for(j = k1 ; j < k2 ; j ++)
702 {
703 col = U_diag_j[j];
704 utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[col]];
705 }
706 utemp_data[perm[i]] *= D[i];
707 }
708
709 /* done, now everything are in u_temp, update solution */
710 hypre_ParVectorAxpy(beta, utemp, u);
711
712 return hypre_error_flag;
713 }
714
715 /* Incomplete LU solve
716 * L, D and U factors only have local scope (no off-diagonal processor terms)
717 * so apart from the residual calculation (which uses A), the solves with the
718 * L and U factors are local.
719 */
720
721 HYPRE_Int
hypre_ILUSolveLU(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParVector * ftemp,hypre_ParVector * utemp)722 hypre_ILUSolveLU(hypre_ParCSRMatrix *A, hypre_ParVector *f,
723 hypre_ParVector *u, HYPRE_Int *perm,
724 HYPRE_Int nLU, hypre_ParCSRMatrix *L,
725 HYPRE_Real* D, hypre_ParCSRMatrix *U,
726 hypre_ParVector *ftemp, hypre_ParVector *utemp)
727 {
728 hypre_CSRMatrix *L_diag = hypre_ParCSRMatrixDiag(L);
729 HYPRE_Real *L_diag_data = hypre_CSRMatrixData(L_diag);
730 HYPRE_Int *L_diag_i = hypre_CSRMatrixI(L_diag);
731 HYPRE_Int *L_diag_j = hypre_CSRMatrixJ(L_diag);
732
733 hypre_CSRMatrix *U_diag = hypre_ParCSRMatrixDiag(U);
734 HYPRE_Real *U_diag_data = hypre_CSRMatrixData(U_diag);
735 HYPRE_Int *U_diag_i = hypre_CSRMatrixI(U_diag);
736 HYPRE_Int *U_diag_j = hypre_CSRMatrixJ(U_diag);
737
738 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
739 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
740
741 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
742 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
743
744 HYPRE_Real alpha;
745 HYPRE_Real beta;
746 HYPRE_Int i, j, k1, k2;
747
748 /* begin */
749 alpha = -1.0;
750 beta = 1.0;
751
752 /* Initialize Utemp to zero.
753 * This is necessary for correctness, when we use optimized
754 * vector operations in the case where sizeof(L, D or U) < sizeof(A)
755 */
756 //hypre_ParVectorSetConstantValues( utemp, 0.);
757 /* compute residual */
758 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
759
760 /* L solve - Forward solve */
761 /* copy rhs to account for diagonal of L (which is identity) */
762 for( i = 0; i < nLU; i++ )
763 {
764 utemp_data[perm[i]] = ftemp_data[perm[i]];
765 }
766 /* update with remaining (off-diagonal) entries of L */
767 for( i = 0; i < nLU; i++ )
768 {
769 k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
770 for(j=k1; j <k2; j++)
771 {
772 utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[L_diag_j[j]]];
773 }
774 }
775 /*-------------------- U solve - Backward substitution */
776 for( i = nLU-1; i >= 0; i-- )
777 {
778 /* first update with the remaining (off-diagonal) entries of U */
779 k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
780 for(j=k1; j <k2; j++)
781 {
782 utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[U_diag_j[j]]];
783 }
784 /* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
785 utemp_data[perm[i]] *= D[i];
786 }
787
788 /* Update solution */
789 hypre_ParVectorAxpy(beta, utemp, u);
790
791
792 return hypre_error_flag;
793 }
794
795
796 /* Incomplete LU solve RAS
797 * L, D and U factors only have local scope (no off-diagonal processor terms)
798 * so apart from the residual calculation (which uses A), the solves with the
799 * L and U factors are local.
800 * fext and uext are tempory arrays for external data
801 */
802
803 HYPRE_Int
hypre_ILUSolveLURAS(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Real * fext,HYPRE_Real * uext)804 hypre_ILUSolveLURAS(hypre_ParCSRMatrix *A, hypre_ParVector *f,
805 hypre_ParVector *u, HYPRE_Int *perm,
806 hypre_ParCSRMatrix *L,
807 HYPRE_Real* D, hypre_ParCSRMatrix *U,
808 hypre_ParVector *ftemp, hypre_ParVector *utemp,
809 HYPRE_Real *fext, HYPRE_Real *uext)
810 {
811
812 hypre_ParCSRCommPkg *comm_pkg;
813 hypre_ParCSRCommHandle *comm_handle;
814 HYPRE_Int num_sends, begin, end;
815
816 hypre_CSRMatrix *L_diag = hypre_ParCSRMatrixDiag(L);
817 HYPRE_Real *L_diag_data = hypre_CSRMatrixData(L_diag);
818 HYPRE_Int *L_diag_i = hypre_CSRMatrixI(L_diag);
819 HYPRE_Int *L_diag_j = hypre_CSRMatrixJ(L_diag);
820
821 hypre_CSRMatrix *U_diag = hypre_ParCSRMatrixDiag(U);
822 HYPRE_Real *U_diag_data = hypre_CSRMatrixData(U_diag);
823 HYPRE_Int *U_diag_i = hypre_CSRMatrixI(U_diag);
824 HYPRE_Int *U_diag_j = hypre_CSRMatrixJ(U_diag);
825
826 HYPRE_Int n = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A));
827 HYPRE_Int m = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
828 // HYPRE_Int buffer_size;
829 HYPRE_Int n_total = m + n;
830
831 HYPRE_Int idx;
832 HYPRE_Int jcol;
833 HYPRE_Int col;
834
835 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
836 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
837
838 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
839 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
840
841 HYPRE_Real alpha;
842 HYPRE_Real beta;
843 HYPRE_Int i, j, k1, k2;
844
845 /* begin */
846 alpha = -1.0;
847 beta = 1.0;
848
849 /* prepare for communication */
850 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
851 /* setup if not yet built */
852 if(!comm_pkg)
853 {
854 hypre_MatvecCommPkgCreate(A);
855 comm_pkg = hypre_ParCSRMatrixCommPkg(A);
856 }
857
858 /* Initialize Utemp to zero.
859 * This is necessary for correctness, when we use optimized
860 * vector operations in the case where sizeof(L, D or U) < sizeof(A)
861 */
862 //hypre_ParVectorSetConstantValues( utemp, 0.);
863 /* compute residual */
864 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
865
866 /* communication to get external data */
867
868 /* get total num of send */
869 num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
870 begin = hypre_ParCSRCommPkgSendMapStart(comm_pkg,0);
871 end = hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends);
872
873 /* copy new index into send_buf */
874 for(i = begin ; i < end ; i ++)
875 {
876 /* all we need is just send out data, we don't need to worry about the
877 * permutation of offd part, actually we don't need to worry about
878 * permutation at all
879 * borrow uext as send buffer .
880 */
881 uext[i-begin] = ftemp_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,i)];
882 }
883
884 /* main communication */
885 comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, uext, fext);
886 hypre_ParCSRCommHandleDestroy(comm_handle);
887
888 /* L solve - Forward solve */
889 for( i = 0 ; i < n_total ; i ++)
890 {
891 k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
892 if( i < n )
893 {
894 /* diag part */
895 utemp_data[perm[i]] = ftemp_data[perm[i]];
896 for(j=k1; j <k2; j++)
897 {
898 col = L_diag_j[j];
899 if( col < n )
900 {
901 utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[col]];
902 }
903 else
904 {
905 jcol = col - n;
906 utemp_data[perm[i]] -= L_diag_data[j] * uext[jcol];
907 }
908 }
909 }
910 else
911 {
912 /* offd part */
913 idx = i - n;
914 uext[idx] = fext[idx];
915 for(j=k1; j <k2; j++)
916 {
917 col = L_diag_j[j];
918 if(col < n)
919 {
920 uext[idx] -= L_diag_data[j] * utemp_data[perm[col]];
921 }
922 else
923 {
924 jcol = col - n;
925 uext[idx] -= L_diag_data[j] * uext[jcol];
926 }
927 }
928 }
929 }
930
931 /*-------------------- U solve - Backward substitution */
932 for( i = n_total-1; i >= 0; i-- )
933 {
934 /* first update with the remaining (off-diagonal) entries of U */
935 k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
936 if( i < n )
937 {
938 /* diag part */
939 for(j=k1; j <k2; j++)
940 {
941 col = U_diag_j[j];
942 if( col < n )
943 {
944 utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[col]];
945 }
946 else
947 {
948 jcol = col - n;
949 utemp_data[perm[i]] -= U_diag_data[j] * uext[jcol];
950 }
951 }
952 /* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
953 utemp_data[perm[i]] *= D[i];
954 }
955 else
956 {
957 /* 2nd part of offd */
958 idx = i - n;
959 for(j=k1; j <k2; j++)
960 {
961 col = U_diag_j[j];
962 if( col < n )
963 {
964 uext[idx] -= U_diag_data[j] * utemp_data[perm[col]];
965 }
966 else
967 {
968 jcol = col - n;
969 uext[idx] -= U_diag_data[j] * uext[jcol];
970 }
971 }
972 /* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
973 uext[idx] *= D[i];
974 }
975
976 }
977 /* Update solution */
978 hypre_ParVectorAxpy(beta, utemp, u);
979
980 return hypre_error_flag;
981 }
982
983 #ifdef HYPRE_USING_CUDA
984
985 /* Permutation function (for GPU version, can just call thrust)
986 * option 00: perm integer array
987 * option 01: rperm integer array
988 * option 10: perm real array
989 * option 11: rperm real array
990 * */
991 HYPRE_Int
hypre_ILUSeqVectorPerm(void * vectori,void * vectoro,HYPRE_Int size,HYPRE_Int * perm,HYPRE_Int option)992 hypre_ILUSeqVectorPerm(void *vectori, void *vectoro, HYPRE_Int size, HYPRE_Int *perm, HYPRE_Int option)
993 {
994 cudaDeviceSynchronize();
995 HYPRE_Int i;
996 switch(option)
997 {
998 case 00:
999 {
1000 HYPRE_Int *ivectori = (HYPRE_Int *) vectori;
1001 HYPRE_Int *ivectoro = (HYPRE_Int *) vectoro;
1002 for(i = 0 ; i < size ; i ++)
1003 {
1004 ivectoro[i] = ivectori[perm[i]];
1005 }
1006 break;
1007 }
1008 case 01:
1009 {
1010 HYPRE_Int *ivectori = (HYPRE_Int *) vectori;
1011 HYPRE_Int *ivectoro = (HYPRE_Int *) vectoro;
1012 for(i = 0 ; i < size ; i ++)
1013 {
1014 ivectoro[perm[i]] = ivectori[i];
1015 }
1016 break;
1017 }
1018 case 10:
1019 {
1020 HYPRE_Real *dvectori = (HYPRE_Real *) vectori;
1021 HYPRE_Real *dvectoro = (HYPRE_Real *) vectoro;
1022 for(i = 0 ; i < size ; i ++)
1023 {
1024 dvectoro[i] = dvectori[perm[i]];
1025 }
1026 break;
1027 }
1028 case 11:
1029 {
1030 HYPRE_Real *dvectori = (HYPRE_Real *) vectori;
1031 HYPRE_Real *dvectoro = (HYPRE_Real *) vectoro;
1032 for(i = 0 ; i < size ; i ++)
1033 {
1034 dvectoro[perm[i]] = dvectori[i];
1035 }
1036 break;
1037 }
1038 default:
1039 {
1040 printf("Error option in ILUSeqVectorPerm");
1041 hypre_assert(1==0);
1042 }
1043 }
1044 return hypre_error_flag;
1045 }
1046
1047 /* Incomplete LU solve (GPU)
1048 * L, D and U factors only have local scope (no off-diagonal processor terms)
1049 * so apart from the residual calculation (which uses A), the solves with the
1050 * L and U factors are local.
1051 */
1052
1053 HYPRE_Int
hypre_ILUSolveCusparseLU(hypre_ParCSRMatrix * A,cusparseMatDescr_t matL_des,cusparseMatDescr_t matU_des,csrsv2Info_t matL_info,csrsv2Info_t matU_info,hypre_CSRMatrix * matLU_d,cusparseSolvePolicy_t ilu_solve_policy,void * ilu_solve_buffer,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int n,hypre_ParVector * ftemp,hypre_ParVector * utemp)1054 hypre_ILUSolveCusparseLU(hypre_ParCSRMatrix *A, cusparseMatDescr_t matL_des, cusparseMatDescr_t matU_des,
1055 csrsv2Info_t matL_info, csrsv2Info_t matU_info, hypre_CSRMatrix *matLU_d,
1056 cusparseSolvePolicy_t ilu_solve_policy, void *ilu_solve_buffer,
1057 hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm,
1058 HYPRE_Int n, hypre_ParVector *ftemp, hypre_ParVector *utemp)
1059 {
1060
1061 /* Only solve when we have stuffs to be solved */
1062 if(n == 0)
1063 {
1064 return hypre_error_flag;
1065 }
1066
1067 /* ILU data */
1068 HYPRE_Real *LU_data = hypre_CSRMatrixData(matLU_d);
1069 HYPRE_Int *LU_i = hypre_CSRMatrixI(matLU_d);
1070 HYPRE_Int *LU_j = hypre_CSRMatrixJ(matLU_d);
1071 HYPRE_Int nnz = LU_i[n];
1072
1073 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
1074 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
1075
1076 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
1077 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
1078
1079 HYPRE_Real alpha;
1080 HYPRE_Real beta;
1081 //HYPRE_Int i, j, k1, k2;
1082
1083 HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
1084 HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
1085
1086 hypre_assert(isDoublePrecision || isSinglePrecision);
1087
1088 /* begin */
1089 alpha = -1.0;
1090 beta = 1.0;
1091
1092 cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
1093
1094 /* Initialize Utemp to zero.
1095 * This is necessary for correctness, when we use optimized
1096 * vector operations in the case where sizeof(L, D or U) < sizeof(A)
1097 */
1098 //hypre_ParVectorSetConstantValues( utemp, 0.);
1099 /* compute residual */
1100 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
1101
1102 /* apply permutation */
1103 HYPRE_THRUST_CALL(gather, perm, perm + n, ftemp_data, utemp_data);
1104
1105 if(isDoublePrecision)
1106 {
1107 /* L solve - Forward solve */
1108 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1109 n, nnz, (hypre_double *) &beta, matL_des,
1110 (hypre_double *) LU_data, LU_i, LU_j, matL_info,
1111 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1112
1113 /* U solve - Backward substitution */
1114 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1115 n, nnz, (hypre_double *) &beta, matU_des,
1116 (hypre_double *) LU_data, LU_i, LU_j, matU_info,
1117 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1118 }
1119 else if(isSinglePrecision)
1120 {
1121 /* L solve - Forward solve */
1122 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1123 n, nnz, (float *) &beta, matL_des,
1124 (float *) LU_data, LU_i, LU_j, matL_info,
1125 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1126
1127 /* U solve - Backward substitution */
1128 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1129 n, nnz, (float *) &beta, matU_des,
1130 (float *) LU_data, LU_i, LU_j, matU_info,
1131 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1132 }
1133
1134 /* apply reverse permutation */
1135 HYPRE_THRUST_CALL(scatter,utemp_data, utemp_data + n, perm, ftemp_data);
1136 /* Update solution */
1137 hypre_ParVectorAxpy(beta, ftemp, u);
1138
1139
1140 return hypre_error_flag;
1141 }
1142
1143 /* Schur Complement solve with GMRES on schur complement
1144 * ParCSRMatrix S is already built in ilu data sturcture, here directly use S
1145 * L, D and U factors only have local scope (no off-diagonal processor terms)
1146 * so apart from the residual calculation (which uses A), the solves with the
1147 * L and U factors are local.
1148 * S is the global Schur complement
1149 * schur_solver is a GMRES solver
1150 * schur_precond is the ILU preconditioner for GMRES
1151 * rhs and x are helper vector for solving Schur system
1152 */
1153
1154 HYPRE_Int
hypre_ILUSolveCusparseSchurGMRES(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end,cusparseMatDescr_t matL_des,cusparseMatDescr_t matU_des,csrsv2Info_t matBL_info,csrsv2Info_t matBU_info,csrsv2Info_t matSL_info,csrsv2Info_t matSU_info,hypre_CSRMatrix * matBLU_d,hypre_CSRMatrix * matE_d,hypre_CSRMatrix * matF_d,cusparseSolvePolicy_t ilu_solve_policy,void * ilu_solve_buffer)1155 hypre_ILUSolveCusparseSchurGMRES(hypre_ParCSRMatrix *A, hypre_ParVector *f,
1156 hypre_ParVector *u, HYPRE_Int *perm,
1157 HYPRE_Int nLU, hypre_ParCSRMatrix *S,
1158 hypre_ParVector *ftemp, hypre_ParVector *utemp,
1159 HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
1160 hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end,
1161 cusparseMatDescr_t matL_des, cusparseMatDescr_t matU_des,
1162 csrsv2Info_t matBL_info, csrsv2Info_t matBU_info, csrsv2Info_t matSL_info, csrsv2Info_t matSU_info,
1163 hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d,
1164 cusparseSolvePolicy_t ilu_solve_policy, void *ilu_solve_buffer)
1165 {
1166 /* If we don't have S block, just do one L solve and one U solve */
1167 if(!S)
1168 {
1169 /* Just call BJ cusparse and return */
1170 return hypre_ILUSolveCusparseLU(A, matL_des, matU_des, matBL_info, matBU_info, matBLU_d, ilu_solve_policy,
1171 ilu_solve_buffer, f, u, perm, nLU, ftemp, utemp);
1172 }
1173
1174 /* data objects for communication */
1175 // MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1176
1177 /* data objects for temp vector */
1178 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
1179 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
1180 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
1181 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
1182 hypre_Vector *rhs_local = hypre_ParVectorLocalVector(rhs);
1183 HYPRE_Real *rhs_data = hypre_VectorData(rhs_local);
1184 hypre_Vector *x_local = hypre_ParVectorLocalVector(x);
1185 HYPRE_Real *x_data = hypre_VectorData(x_local);
1186
1187 HYPRE_Real alpha;
1188 HYPRE_Real beta;
1189 //HYPRE_Real gamma;
1190 //HYPRE_Int i, j, k1, k2, col;
1191
1192 /* problem size */
1193 HYPRE_Int *BLU_i = NULL;
1194 HYPRE_Int *BLU_j = NULL;
1195 HYPRE_Real *BLU_data = NULL;
1196 HYPRE_Int BLU_nnz = 0;
1197 hypre_CSRMatrix *matSLU_d = hypre_ParCSRMatrixDiag(S);
1198 HYPRE_Int *SLU_i = hypre_CSRMatrixI(matSLU_d);
1199 HYPRE_Int *SLU_j = hypre_CSRMatrixJ(matSLU_d);
1200 HYPRE_Real *SLU_data = hypre_CSRMatrixData(matSLU_d);
1201 HYPRE_Int m = hypre_CSRMatrixNumRows(matSLU_d);
1202 HYPRE_Int n = nLU + m;
1203 HYPRE_Int SLU_nnz = SLU_i[m];
1204
1205 hypre_Vector *ftemp_upper = hypre_SeqVectorCreate(nLU);
1206 hypre_Vector *utemp_lower = hypre_SeqVectorCreate(m);
1207 hypre_VectorOwnsData(ftemp_upper) = 0;
1208 hypre_VectorOwnsData(utemp_lower) = 0;
1209 hypre_VectorData(ftemp_upper) = ftemp_data;
1210 hypre_VectorData(utemp_lower) = utemp_data + nLU;
1211 hypre_SeqVectorInitialize(ftemp_upper);
1212 hypre_SeqVectorInitialize(utemp_lower);
1213
1214 if( nLU > 0)
1215 {
1216 BLU_i = hypre_CSRMatrixI(matBLU_d);
1217 BLU_j = hypre_CSRMatrixJ(matBLU_d);
1218 BLU_data = hypre_CSRMatrixData(matBLU_d);
1219 BLU_nnz = BLU_i[nLU];
1220 }
1221
1222 /* begin */
1223 beta = 1.0;
1224 alpha = -1.0;
1225 //gamma = 0.0;
1226
1227 HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
1228 HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
1229
1230 hypre_assert(isDoublePrecision || isSinglePrecision);
1231
1232 cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
1233
1234 /* compute residual */
1235 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
1236
1237 /* 1st need to solve LBi*xi = fi
1238 * L solve, solve xi put in u_temp upper
1239 */
1240
1241 /* apply permutation before we can start our solve */
1242 HYPRE_THRUST_CALL(gather, perm, perm + n, ftemp_data, utemp_data);
1243
1244 if(nLU > 0)
1245 {
1246
1247 /* This solve won't touch data in utemp, thus, gi is still in utemp_lower */
1248 if(isDoublePrecision)
1249 {
1250 /* L solve - Forward solve */
1251 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1252 nLU, BLU_nnz, (hypre_double *) &beta, matL_des,
1253 (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1254 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1255 }
1256 else if(isSinglePrecision)
1257 {
1258 /* L solve - Forward solve */
1259 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1260 nLU, BLU_nnz, (float *) &beta, matL_des,
1261 (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1262 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1263 }
1264
1265 /* 2nd need to compute g'i = gi - Ei*UBi^{-1}*xi
1266 * Ei*UBi^{-1} is exactly the matE_d here
1267 * Now: LBi^{-1}f_i is in ftemp_upper
1268 * gi' is in utemp_lower
1269 */
1270
1271 hypre_CSRMatrixMatvec(alpha, matE_d, ftemp_upper, beta, utemp_lower);
1272
1273 }
1274
1275 /* 3rd need to solve global Schur Complement M^{-1}Sy = M^{-1}g'
1276 * for now only solve the local system
1277 * solve y put in u_temp lower
1278 * only solve whe S is not NULL
1279 */
1280
1281 /* setup vectors for solve
1282 * rhs = M^{-1}g'
1283 */
1284
1285 if(m > 0)
1286 {
1287 if(isDoublePrecision)
1288 {
1289 /* L solve */
1290 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1291 m, SLU_nnz, (hypre_double *) &beta, matL_des,
1292 (hypre_double *) SLU_data, SLU_i, SLU_j, matSL_info,
1293 (hypre_double *) utemp_data + nLU, (hypre_double *) ftemp_data + nLU, ilu_solve_policy, ilu_solve_buffer));
1294
1295 /* U solve */
1296 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1297 m, SLU_nnz, (hypre_double *) &beta, matU_des,
1298 (hypre_double *) SLU_data, SLU_i, SLU_j, matSU_info,
1299 (hypre_double *) ftemp_data + nLU, (hypre_double *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1300 }
1301 else if(isSinglePrecision)
1302 {
1303 /* L solve */
1304 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1305 m, SLU_nnz, (float *) &beta, matL_des,
1306 (float *) SLU_data, SLU_i, SLU_j, matSL_info,
1307 (float *) utemp_data + nLU, (float *) ftemp_data + nLU, ilu_solve_policy, ilu_solve_buffer));
1308
1309 /* U solve */
1310 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1311 m, SLU_nnz, (float *) &beta, matU_des,
1312 (float *) SLU_data, SLU_i, SLU_j, matSU_info,
1313 (float *) ftemp_data + nLU, (float *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1314 }
1315 }
1316
1317
1318 /* solve */
1319 /* with tricky initial guess */
1320 //hypre_Vector *tv = hypre_ParVectorLocalVector(x);
1321 //HYPRE_Real *tz = hypre_VectorData(tv);
1322 HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
1323 /* 4th need to compute zi = xi - LBi^-1*yi
1324 * put zi in f_temp upper
1325 * only do this computation when nLU < n
1326 * U is unsorted, search is expensive when unnecessary
1327 */
1328
1329 if(nLU > 0)
1330 {
1331 hypre_CSRMatrixMatvec(alpha, matF_d, x_local, beta, ftemp_upper);
1332
1333 /* 5th need to solve UBi*ui = zi */
1334 /* put result in u_temp upper */
1335
1336 if(isDoublePrecision)
1337 {
1338 /* U solve - Forward solve */
1339 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1340 nLU, BLU_nnz, (hypre_double *) &beta, matU_des,
1341 (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1342 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1343 }
1344 else if(isSinglePrecision)
1345 {
1346 /* U solve - Forward solve */
1347 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1348 nLU, BLU_nnz, (float *) &beta, matU_des,
1349 (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1350 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1351 }
1352 }
1353
1354 /* copy lower part solution into u_temp as well */
1355 hypre_TMemcpy(utemp_data + nLU, x_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1356
1357 /* perm back */
1358 HYPRE_THRUST_CALL(scatter,utemp_data, utemp_data + n, perm, ftemp_data);
1359
1360 /* done, now everything are in u_temp, update solution */
1361 hypre_ParVectorAxpy(beta, ftemp, u);
1362
1363 hypre_SeqVectorDestroy(ftemp_upper);
1364 hypre_SeqVectorDestroy(utemp_lower);
1365
1366 return hypre_error_flag;
1367 }
1368
1369 /* Schur Complement solve with GMRES on schur complement, RAP style
1370 * ParCSRMatrix S is already built in ilu data sturcture, here directly use S
1371 * L, D and U factors only have local scope (no off-diagonal processor terms)
1372 * so apart from the residual calculation (which uses A), the solves with the
1373 * L and U factors are local.
1374 * S is the global Schur complement
1375 * schur_solver is a GMRES solver
1376 * schur_precond is the ILU preconditioner for GMRES
1377 * rhs and x are helper vector for solving Schur system
1378 */
1379
1380 HYPRE_Int
hypre_ILUSolveRAPGMRES(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * S,hypre_ParVector * ftemp,hypre_ParVector * utemp,hypre_ParVector * xtemp,hypre_ParVector * ytemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end,cusparseMatDescr_t matL_des,cusparseMatDescr_t matU_des,csrsv2Info_t matAL_info,csrsv2Info_t matAU_info,csrsv2Info_t matBL_info,csrsv2Info_t matBU_info,csrsv2Info_t matSL_info,csrsv2Info_t matSU_info,hypre_ParCSRMatrix * Aperm,hypre_CSRMatrix * matALU_d,hypre_CSRMatrix * matBLU_d,hypre_CSRMatrix * matE_d,hypre_CSRMatrix * matF_d,cusparseSolvePolicy_t ilu_solve_policy,void * ilu_solve_buffer,HYPRE_Int test_opt)1381 hypre_ILUSolveRAPGMRES(hypre_ParCSRMatrix *A, hypre_ParVector *f,
1382 hypre_ParVector *u, HYPRE_Int *perm,
1383 HYPRE_Int nLU, hypre_ParCSRMatrix *S,
1384 hypre_ParVector *ftemp, hypre_ParVector *utemp, hypre_ParVector *xtemp, hypre_ParVector *ytemp,
1385 HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
1386 hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end,
1387 cusparseMatDescr_t matL_des, cusparseMatDescr_t matU_des,
1388 csrsv2Info_t matAL_info, csrsv2Info_t matAU_info,
1389 csrsv2Info_t matBL_info, csrsv2Info_t matBU_info,
1390 csrsv2Info_t matSL_info, csrsv2Info_t matSU_info,
1391 hypre_ParCSRMatrix *Aperm, hypre_CSRMatrix *matALU_d, hypre_CSRMatrix *matBLU_d, hypre_CSRMatrix *matE_d, hypre_CSRMatrix *matF_d,
1392 cusparseSolvePolicy_t ilu_solve_policy, void *ilu_solve_buffer, HYPRE_Int test_opt)
1393 {
1394 /* data objects for communication */
1395 // MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1396
1397 /* data objects for temp vector */
1398 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
1399 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
1400 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
1401 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
1402 hypre_Vector *xtemp_local = hypre_ParVectorLocalVector(xtemp);
1403 HYPRE_Real *xtemp_data = hypre_VectorData(xtemp_local);
1404 //hypre_Vector *ytemp_local = hypre_ParVectorLocalVector(ytemp);
1405 //HYPRE_Real *ytemp_data = hypre_VectorData(ytemp_local);
1406 hypre_Vector *rhs_local = hypre_ParVectorLocalVector(rhs);
1407 HYPRE_Real *rhs_data = hypre_VectorData(rhs_local);
1408 hypre_Vector *x_local = hypre_ParVectorLocalVector(x);
1409 HYPRE_Real *x_data = hypre_VectorData(x_local);
1410
1411 //HYPRE_Int i, j, k1, k2, col;
1412
1413 /* problem size */
1414 HYPRE_Int *ALU_i = hypre_CSRMatrixI(matALU_d);
1415 HYPRE_Int *ALU_j = hypre_CSRMatrixJ(matALU_d);
1416 HYPRE_Real *ALU_data = hypre_CSRMatrixData(matALU_d);
1417 HYPRE_Int *BLU_i = hypre_CSRMatrixI(matBLU_d);
1418 HYPRE_Int *BLU_j = hypre_CSRMatrixJ(matBLU_d);
1419 HYPRE_Real *BLU_data = hypre_CSRMatrixData(matBLU_d);
1420 HYPRE_Int BLU_nnz = BLU_i[nLU];
1421 hypre_CSRMatrix *matSLU_d = hypre_ParCSRMatrixDiag(S);
1422 HYPRE_Int *SLU_i = hypre_CSRMatrixI(matSLU_d);
1423 HYPRE_Int *SLU_j = hypre_CSRMatrixJ(matSLU_d);
1424 HYPRE_Real *SLU_data = hypre_CSRMatrixData(matSLU_d);
1425 HYPRE_Int m = hypre_CSRMatrixNumRows(matSLU_d);
1426 HYPRE_Int n = nLU + m;
1427 HYPRE_Int SLU_nnz = SLU_i[m];
1428 HYPRE_Int ALU_nnz = ALU_i[n];
1429
1430 hypre_Vector *ftemp_upper = hypre_SeqVectorCreate(nLU);
1431 hypre_Vector *utemp_lower = hypre_SeqVectorCreate(m);
1432 hypre_VectorOwnsData(ftemp_upper) = 0;
1433 hypre_VectorOwnsData(utemp_lower) = 0;
1434 hypre_VectorData(ftemp_upper) = ftemp_data;
1435 hypre_VectorData(utemp_lower) = utemp_data + nLU;
1436 hypre_SeqVectorInitialize(ftemp_upper);
1437 hypre_SeqVectorInitialize(utemp_lower);
1438
1439 /* begin */
1440 HYPRE_Real one = 1.0;
1441 HYPRE_Real mone = -1.0;
1442 HYPRE_Real zero = 0.0;
1443
1444 HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
1445 HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
1446
1447 hypre_assert(isDoublePrecision || isSinglePrecision);
1448
1449 cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
1450
1451 switch(test_opt)
1452 {
1453 case 1: case 3:
1454 {
1455 /* E and F */
1456 /* compute residual */
1457 hypre_ParCSRMatrixMatvecOutOfPlace(mone, A, u, one, f, utemp);
1458
1459 /* apply permutation before we can start our solve
1460 * Au=f -> (PAQ)Q'u=Pf
1461 */
1462 HYPRE_THRUST_CALL(gather, perm, perm + n, utemp_data, ftemp_data);
1463
1464 /* A-smoothing
1465 * x = [UA\(LA\(P*f_u))] fill to xtemp
1466 */
1467 if(n > 0)
1468 {
1469 if(isDoublePrecision)
1470 {
1471 /* L solve - Forward solve */
1472 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1473 n, ALU_nnz, (hypre_double *) &one, matL_des,
1474 (hypre_double *) ALU_data, ALU_i, ALU_j, matAL_info,
1475 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1476 /* U solve - Forward solve */
1477 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1478 n, ALU_nnz, (hypre_double *) &one, matU_des,
1479 (hypre_double *) ALU_data, ALU_i, ALU_j, matAU_info,
1480 (hypre_double *) utemp_data, (hypre_double *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1481 }
1482 else if(isSinglePrecision)
1483 {
1484 /* L solve - Forward solve */
1485 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1486 n, ALU_nnz, (float *) &one, matL_des,
1487 (float *) ALU_data, ALU_i, ALU_j, matAL_info,
1488 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1489 /* U solve - Forward solve */
1490 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1491 n, ALU_nnz, (float *) &one, matU_des,
1492 (float *) ALU_data, ALU_i, ALU_j, matAU_info,
1493 (float *) utemp_data, (float *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1494 }
1495 }
1496 /* residual, we should not touch xtemp for now
1497 * r = R*(f-PAQx)
1498 */
1499 hypre_ParCSRMatrixMatvec(mone, Aperm, xtemp, one, ftemp);
1500 /* with R is complex */
1501 /* copy partial data in */
1502 hypre_TMemcpy( rhs_data, ftemp_data + nLU, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1503
1504 /* solve L^{-1} */
1505 if(nLU > 0)
1506 {
1507 if(isDoublePrecision)
1508 {
1509 /* L solve */
1510 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1511 nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1512 (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1513 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1514 }
1515 else if(isSinglePrecision)
1516 {
1517 /* L solve */
1518 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1519 nLU, BLU_nnz, (float *) &one, matL_des,
1520 (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1521 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1522 }
1523 /* -U^{-1}L^{-1} */
1524 if(isDoublePrecision)
1525 {
1526 /* U solve */
1527 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1528 nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1529 (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1530 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1531 }
1532 else if(isSinglePrecision)
1533 {
1534 /* U solve */
1535 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1536 nLU, BLU_nnz, (float *) &one, matU_des,
1537 (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1538 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1539 }
1540 }
1541 /* -EU^{-1}L^{-1} */
1542 hypre_CSRMatrixMatvec(mone, matE_d, ftemp_upper, one, rhs_local);
1543
1544
1545 /* now solve S
1546 */
1547 if(S)
1548 {
1549 /* if we have a schur complement */
1550 hypre_ParVectorSetConstantValues(x, 0.0);
1551 HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
1552
1553 /* u = xtemp + P*x */
1554 /* -Fx */
1555 hypre_CSRMatrixMatvec(mone, matF_d, x_local, zero, ftemp_upper);
1556 /* -L^{-1}Fx */
1557 if(nLU > 0)
1558 {
1559 if(isDoublePrecision)
1560 {
1561 /* L solve */
1562 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1563 nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1564 (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1565 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1566 }
1567 else if(isSinglePrecision)
1568 {
1569 /* L solve */
1570 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1571 nLU, BLU_nnz, (float *) &one, matL_des,
1572 (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1573 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1574 }
1575 /* -U{-1}L^{-1}Fx */
1576 if(isDoublePrecision)
1577 {
1578 /* U solve */
1579 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1580 nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1581 (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1582 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1583 }
1584 else if(isSinglePrecision)
1585 {
1586 /* U solve */
1587 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1588 nLU, BLU_nnz, (float *) &one, matU_des,
1589 (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1590 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1591 }
1592 }
1593 /* now copy data to y_lower */
1594 hypre_TMemcpy( ftemp_data + nLU, x_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1595
1596 /* correction to the residual */
1597 hypre_ParVectorAxpy(one, ftemp, xtemp);
1598
1599 }
1600 else
1601 {
1602 /* otherwise just apply triangular solves */
1603 if(m > 0)
1604 {
1605 if(isDoublePrecision)
1606 {
1607 /* L solve - Forward solve */
1608 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1609 m, SLU_nnz, (hypre_double *) &one, matL_des,
1610 (hypre_double *) SLU_data, SLU_i, SLU_j, matSL_info,
1611 (hypre_double *) rhs_data, (hypre_double *) x_data, ilu_solve_policy, ilu_solve_buffer));
1612 }
1613 else if(isSinglePrecision)
1614 {
1615 /* L solve - Forward solve */
1616 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1617 m, SLU_nnz, (float *) &one, matL_des,
1618 (float *) SLU_data, SLU_i, SLU_j, matSL_info,
1619 (float *) rhs_data, (float *) x_data, ilu_solve_policy, ilu_solve_buffer));
1620 }
1621 if(isDoublePrecision)
1622 {
1623 /* U solve - Forward solve */
1624 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1625 m, SLU_nnz, (hypre_double *) &one, matU_des,
1626 (hypre_double *) SLU_data, SLU_i, SLU_j, matSU_info,
1627 (hypre_double *) x_data, (hypre_double *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1628 }
1629 else if(isSinglePrecision)
1630 {
1631 /* U solve - Forward solve */
1632 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1633 m, SLU_nnz, (float *) &one, matU_des,
1634 (float *) SLU_data, SLU_i, SLU_j, matSU_info,
1635 (float *) x_data, (float *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1636 }
1637 }
1638
1639 /* u = xtemp + P*x */
1640 /* -Fx */
1641 hypre_CSRMatrixMatvec(mone, matF_d, rhs_local, zero, ftemp_upper);
1642 /* -L^{-1}Fx */
1643 if(nLU > 0)
1644 {
1645 if(isDoublePrecision)
1646 {
1647 /* L solve */
1648 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1649 nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1650 (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1651 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1652 }
1653 else if(isSinglePrecision)
1654 {
1655 /* L solve */
1656 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1657 nLU, BLU_nnz, (float *) &one, matL_des,
1658 (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1659 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1660 }
1661 /* -U{-1}L^{-1}Fx */
1662 if(isDoublePrecision)
1663 {
1664 /* U solve */
1665 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1666 nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1667 (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1668 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1669 }
1670 else if(isSinglePrecision)
1671 {
1672 /* U solve */
1673 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1674 nLU, BLU_nnz, (float *) &one, matU_des,
1675 (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1676 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1677 }
1678 }
1679 /* now copy data to y_lower */
1680 hypre_TMemcpy( ftemp_data + nLU, rhs_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1681
1682 hypre_ParVectorAxpy(one, ftemp, xtemp);
1683
1684 }
1685
1686 /* perm back */
1687 HYPRE_THRUST_CALL(scatter,xtemp_data, xtemp_data + n, perm, ftemp_data);
1688
1689 /* done, now everything are in u_temp, update solution */
1690 hypre_ParVectorAxpy(one, ftemp, u);
1691 }
1692 break;
1693 case 0: case 2: default:
1694 {
1695 /* EU^{-1} and L^{-1}F */
1696 /* compute residual */
1697 hypre_ParCSRMatrixMatvecOutOfPlace(mone, A, u, one, f, ftemp);
1698 /* apply permutation before we can start our solve
1699 * Au=f -> (PAQ)Q'u=Pf
1700 */
1701 HYPRE_THRUST_CALL(gather, perm, perm + n, ftemp_data, utemp_data);
1702
1703 /* A-smoothing
1704 * x = [UA\(LA\(P*f_u))] fill to xtemp
1705 */
1706 if(n > 0)
1707 {
1708 if(isDoublePrecision)
1709 {
1710 /* L solve - Forward solve */
1711 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1712 n, ALU_nnz, (hypre_double *) &one, matL_des,
1713 (hypre_double *) ALU_data, ALU_i, ALU_j, matAL_info,
1714 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1715 /* U solve - Forward solve */
1716 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1717 n, ALU_nnz, (hypre_double *) &one, matU_des,
1718 (hypre_double *) ALU_data, ALU_i, ALU_j, matAU_info,
1719 (hypre_double *) ftemp_data, (hypre_double *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1720 }
1721 else if(isSinglePrecision)
1722 {
1723 /* L solve - Forward solve */
1724 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1725 n, ALU_nnz, (float *) &one, matL_des,
1726 (float *) ALU_data, ALU_i, ALU_j, matAL_info,
1727 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1728 /* U solve - Forward solve */
1729 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1730 n, ALU_nnz, (float *) &one, matU_des,
1731 (float *) ALU_data, ALU_i, ALU_j, matAU_info,
1732 (float *) ftemp_data, (float *) xtemp_data, ilu_solve_policy, ilu_solve_buffer));
1733 }
1734 }
1735 /* residual, we should not touch xtemp for now
1736 * r = R*(f-PAQx)
1737 */
1738 hypre_ParCSRMatrixMatvec(mone, Aperm, xtemp, one, utemp);
1739 /* with R is complex */
1740 /* copy partial data in */
1741 hypre_TMemcpy( rhs_data, utemp_data + nLU, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1742
1743 /* solve L^{-1} */
1744 if(nLU > 0)
1745 {
1746 if(isDoublePrecision)
1747 {
1748 /* L solve */
1749 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1750 nLU, BLU_nnz, (hypre_double *) &one, matL_des,
1751 (hypre_double *) BLU_data, BLU_i, BLU_j, matBL_info,
1752 (hypre_double *) utemp_data, (hypre_double *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1753 }
1754 else if(isSinglePrecision)
1755 {
1756 /* L solve */
1757 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1758 nLU, BLU_nnz, (float *) &one, matL_des,
1759 (float *) BLU_data, BLU_i, BLU_j, matBL_info,
1760 (float *) utemp_data, (float *) ftemp_data, ilu_solve_policy, ilu_solve_buffer));
1761 }
1762 }
1763 /* -EU^{-1}L^{-1} */
1764 hypre_CSRMatrixMatvec(mone, matE_d, ftemp_upper, one, rhs_local);
1765
1766
1767 /* now solve S
1768 */
1769 if(S)
1770 {
1771 /* if we have a schur complement */
1772 hypre_ParVectorSetConstantValues(x, 0.0);
1773 HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
1774
1775 /* u = xtemp + P*x */
1776 /* -L^{-1}Fx */
1777 hypre_CSRMatrixMatvec(mone, matF_d, x_local, zero, ftemp_upper);
1778 /* -U{-1}L^{-1}Fx */
1779 if(nLU > 0)
1780 {
1781 if(isDoublePrecision)
1782 {
1783 /* U solve */
1784 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1785 nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1786 (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1787 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1788 }
1789 else if(isSinglePrecision)
1790 {
1791 /* U solve */
1792 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1793 nLU, BLU_nnz, (float *) &one, matU_des,
1794 (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1795 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1796 }
1797 }
1798 /* now copy data to y_lower */
1799 hypre_TMemcpy( utemp_data + nLU, x_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1800
1801 hypre_ParVectorAxpy(one, utemp, xtemp);
1802
1803 }
1804 else
1805 {
1806 /* otherwise just apply triangular solves */
1807 if(m > 0)
1808 {
1809 if(isDoublePrecision)
1810 {
1811 /* L solve - Forward solve */
1812 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1813 m, SLU_nnz, (hypre_double *) &one, matL_des,
1814 (hypre_double *) SLU_data, SLU_i, SLU_j, matSL_info,
1815 (hypre_double *) rhs_data, (hypre_double *) x_data, ilu_solve_policy, ilu_solve_buffer));
1816 }
1817 else if(isSinglePrecision)
1818 {
1819 /* L solve - Forward solve */
1820 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1821 m, SLU_nnz, (float *) &one, matL_des,
1822 (float *) SLU_data, SLU_i, SLU_j, matSL_info,
1823 (float *) rhs_data, (float *) x_data, ilu_solve_policy, ilu_solve_buffer));
1824 }
1825 if(isDoublePrecision)
1826 {
1827 /* U solve - Forward solve */
1828 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1829 m, SLU_nnz, (hypre_double *) &one, matU_des,
1830 (hypre_double *) SLU_data, SLU_i, SLU_j, matSU_info,
1831 (hypre_double *) x_data, (hypre_double *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1832 }
1833 else if(isSinglePrecision)
1834 {
1835 /* U solve - Forward solve */
1836 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1837 m, SLU_nnz, (float *) &one, matU_des,
1838 (float *) SLU_data, SLU_i, SLU_j, matSU_info,
1839 (float *) x_data, (float *) rhs_data, ilu_solve_policy, ilu_solve_buffer));
1840 }
1841 }
1842 /* u = xtemp + P*x */
1843 /* -L^{-1}Fx */
1844 hypre_CSRMatrixMatvec(mone, matF_d, rhs_local, zero, ftemp_upper);
1845 /* -U{-1}L^{-1}Fx */
1846 if(nLU > 0)
1847 {
1848 if(isDoublePrecision)
1849 {
1850 /* U solve */
1851 HYPRE_CUSPARSE_CALL(cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1852 nLU, BLU_nnz, (hypre_double *) &one, matU_des,
1853 (hypre_double *) BLU_data, BLU_i, BLU_j, matBU_info,
1854 (hypre_double *) ftemp_data, (hypre_double *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1855 }
1856 else if(isSinglePrecision)
1857 {
1858 /* U solve */
1859 HYPRE_CUSPARSE_CALL(cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1860 nLU, BLU_nnz, (float *) &one, matU_des,
1861 (float *) BLU_data, BLU_i, BLU_j, matBU_info,
1862 (float *) ftemp_data, (float *) utemp_data, ilu_solve_policy, ilu_solve_buffer));
1863 }
1864 }
1865 /* now copy data to y_lower */
1866 hypre_TMemcpy( utemp_data + nLU, rhs_data, HYPRE_Real, m, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
1867
1868 hypre_ParVectorAxpy(one, utemp, xtemp);
1869
1870 }
1871
1872 /* perm back */
1873 HYPRE_THRUST_CALL(scatter,xtemp_data, xtemp_data + n, perm, ftemp_data);
1874
1875 /* done, now everything are in u_temp, update solution */
1876 hypre_ParVectorAxpy(one, ftemp, u);
1877 }
1878 break;
1879 }
1880
1881 return hypre_error_flag;
1882 }
1883
1884 #endif
1885
1886 HYPRE_Int
hypre_ILUSolveRAPGMRESHOST(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,HYPRE_Int * perm,HYPRE_Int nLU,hypre_ParCSRMatrix * L,HYPRE_Real * D,hypre_ParCSRMatrix * U,hypre_ParCSRMatrix * mL,HYPRE_Real * mD,hypre_ParCSRMatrix * mU,hypre_ParVector * ftemp,hypre_ParVector * utemp,hypre_ParVector * xtemp,hypre_ParVector * ytemp,HYPRE_Solver schur_solver,HYPRE_Solver schur_precond,hypre_ParVector * rhs,hypre_ParVector * x,HYPRE_Int * u_end)1887 hypre_ILUSolveRAPGMRESHOST(hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u, HYPRE_Int *perm,
1888 HYPRE_Int nLU, hypre_ParCSRMatrix *L, HYPRE_Real *D, hypre_ParCSRMatrix *U,
1889 hypre_ParCSRMatrix *mL, HYPRE_Real *mD, hypre_ParCSRMatrix *mU,
1890 hypre_ParVector *ftemp, hypre_ParVector *utemp,
1891 hypre_ParVector *xtemp, hypre_ParVector *ytemp,
1892 HYPRE_Solver schur_solver, HYPRE_Solver schur_precond,
1893 hypre_ParVector *rhs, hypre_ParVector *x, HYPRE_Int *u_end)
1894 {
1895 //#pragma omp parallel
1896 // printf("threads %d\n",omp_get_num_threads());
1897 /* data objects for communication */
1898 // MPI_Comm comm = hypre_ParCSRMatrixComm(A);
1899
1900 /* data objects for L and U */
1901 hypre_CSRMatrix *L_diag = hypre_ParCSRMatrixDiag(L);
1902 HYPRE_Real *L_diag_data = hypre_CSRMatrixData(L_diag);
1903 HYPRE_Int *L_diag_i = hypre_CSRMatrixI(L_diag);
1904 HYPRE_Int *L_diag_j = hypre_CSRMatrixJ(L_diag);
1905 hypre_CSRMatrix *U_diag = hypre_ParCSRMatrixDiag(U);
1906 HYPRE_Real *U_diag_data = hypre_CSRMatrixData(U_diag);
1907 HYPRE_Int *U_diag_i = hypre_CSRMatrixI(U_diag);
1908 HYPRE_Int *U_diag_j = hypre_CSRMatrixJ(U_diag);
1909
1910 hypre_CSRMatrix *mL_diag = hypre_ParCSRMatrixDiag(mL);
1911 HYPRE_Real *mL_diag_data = hypre_CSRMatrixData(mL_diag);
1912 HYPRE_Int *mL_diag_i = hypre_CSRMatrixI(mL_diag);
1913 HYPRE_Int *mL_diag_j = hypre_CSRMatrixJ(mL_diag);
1914 hypre_CSRMatrix *mU_diag = hypre_ParCSRMatrixDiag(mU);
1915 HYPRE_Real *mU_diag_data = hypre_CSRMatrixData(mU_diag);
1916 HYPRE_Int *mU_diag_i = hypre_CSRMatrixI(mU_diag);
1917 HYPRE_Int *mU_diag_j = hypre_CSRMatrixJ(mU_diag);
1918
1919 hypre_Vector *utemp_local = hypre_ParVectorLocalVector(utemp);
1920 HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
1921 hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
1922 HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
1923 hypre_Vector *xtemp_local = NULL;
1924 HYPRE_Real *xtemp_data = NULL;
1925 hypre_Vector *ytemp_local = NULL;
1926 HYPRE_Real *ytemp_data = NULL;
1927 if(xtemp)
1928 {
1929 /* xtemp might be null when we have no Schur complement */
1930 xtemp_local = hypre_ParVectorLocalVector(xtemp);
1931 xtemp_data = hypre_VectorData(xtemp_local);
1932 ytemp_local = hypre_ParVectorLocalVector(ytemp);
1933 ytemp_data = hypre_VectorData(ytemp_local);
1934 }
1935
1936 HYPRE_Real alpha;
1937 HYPRE_Real beta;
1938 HYPRE_Int i, j, k1, k2, col;
1939
1940 /* problem size */
1941 HYPRE_Int n = hypre_CSRMatrixNumRows(L_diag);
1942 HYPRE_Int m = n - nLU;
1943
1944 /* other data objects for computation */
1945 //hypre_Vector *f_local;
1946 //HYPRE_Real *f_data;
1947 hypre_Vector *rhs_local;
1948 HYPRE_Real *rhs_data;
1949 hypre_Vector *x_local;
1950 HYPRE_Real *x_data;
1951
1952 /* begin */
1953 beta = 1.0;
1954 alpha = -1.0;
1955
1956 if(m > 0)
1957 {
1958 /* setup vectors for solve */
1959 rhs_local = hypre_ParVectorLocalVector(rhs);
1960 rhs_data = hypre_VectorData(rhs_local);
1961 x_local = hypre_ParVectorLocalVector(x);
1962 x_data = hypre_VectorData(x_local);
1963
1964 }
1965
1966 /* only support RAP with partial factorized W and Z */
1967
1968 /* compute residual */
1969 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
1970
1971 /* A-smoothing f_temp = [UA \ LA \ (f_temp[perm])] */
1972 /* permuted L solve */
1973 for(i = 0 ; i < n ; i ++)
1974 {
1975 utemp_data[i] = ftemp_data[perm[i]];
1976 k1 = L_diag_i[i] ; k2 = L_diag_i[i+1];
1977 for(j = k1 ; j < k2 ; j ++)
1978 {
1979 col = L_diag_j[j];
1980 utemp_data[i] -= L_diag_data[j] * utemp_data[col];
1981 }
1982 }
1983
1984 if(!xtemp)
1985 {
1986 /* in this case, we don't have a Schur complement */
1987 /* U solve */
1988 for(i = n-1 ; i >= 0 ; i --)
1989 {
1990 ftemp_data[perm[i]] = utemp_data[i];
1991 k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
1992 for(j = k1 ; j < k2 ; j ++)
1993 {
1994 col = U_diag_j[j];
1995 ftemp_data[perm[i]] -= U_diag_data[j] * ftemp_data[perm[col]];
1996 }
1997 ftemp_data[perm[i]] *= D[i];
1998 }
1999
2000 hypre_ParVectorAxpy(beta, ftemp, u);
2001
2002 return hypre_error_flag;
2003 }
2004
2005 /* U solve */
2006 for(i = n-1 ; i >= 0 ; i --)
2007 {
2008 xtemp_data[perm[i]] = utemp_data[i];
2009 k1 = U_diag_i[i] ; k2 = U_diag_i[i+1];
2010 for(j = k1 ; j < k2 ; j ++)
2011 {
2012 col = U_diag_j[j];
2013 xtemp_data[perm[i]] -= U_diag_data[j] * xtemp_data[perm[col]];
2014 }
2015 xtemp_data[perm[i]] *= D[i];
2016 }
2017
2018 /* coarse-grid correction */
2019 /* now f_temp is the result of A-smoothing
2020 * rhs = R*(b - Ax)
2021 * */
2022 // utemp = (ftemp - A*xtemp)
2023 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, xtemp, beta, ftemp, utemp);
2024
2025 // R = [-L21 L\inv, I]
2026 if( m > 0)
2027 {
2028 /* first is L solve */
2029 for(i = 0 ; i < nLU ; i ++)
2030 {
2031 ytemp_data[i] = utemp_data[perm[i]];
2032 k1 = mL_diag_i[i] ; k2 = mL_diag_i[i+1];
2033 for(j = k1 ; j < k2 ; j ++)
2034 {
2035 col = mL_diag_j[j];
2036 ytemp_data[i] -= mL_diag_data[j] * ytemp_data[col];
2037 }
2038 }
2039
2040 /* apply -W * ytemp on this, and take care of the I part */
2041 for(i = nLU ; i < n ; i ++)
2042 {
2043 rhs_data[i - nLU] = utemp_data[perm[i]];
2044 k1 = mL_diag_i[i] ; k2 = u_end[i];
2045 for(j = k1 ; j < k2 ; j ++)
2046 {
2047 col = mL_diag_j[j];
2048 rhs_data[i - nLU] -= mL_diag_data[j] * ytemp_data[col];
2049 }
2050 }
2051 }
2052
2053 /* now the rhs is ready */
2054 hypre_SeqVectorSetConstantValues(x_local, 0.0);
2055 HYPRE_GMRESSolve(schur_solver,(HYPRE_Matrix)schur_precond,(HYPRE_Vector)rhs,(HYPRE_Vector)x);
2056
2057 if(m > 0)
2058 {
2059 /*
2060 for(i = 0 ; i < m ; i ++)
2061 {
2062 x_data[i] = rhs_data[i];
2063 k1 = u_end[i+nLU] ; k2 = mL_diag_i[i+nLU+1];
2064 for(j = k1 ; j < k2 ; j ++)
2065 {
2066 col = mL_diag_j[j];
2067 x_data[i] -= mL_diag_data[j] * x_data[col-nLU];
2068 }
2069 }
2070
2071 for(i = m-1 ; i >= 0 ; i --)
2072 {
2073 rhs_data[i] = x_data[i];
2074 k1 = mU_diag_i[i+nLU] ; k2 = mU_diag_i[i+1+nLU];
2075 for(j = k1 ; j < k2 ; j ++)
2076 {
2077 col = mU_diag_j[j];
2078 rhs_data[i] -= mU_diag_data[j] * rhs_data[col-nLU];
2079 }
2080 rhs_data[i] *= mD[i];
2081 }
2082 */
2083
2084 /* after solve, update x = x + Pv
2085 * that is, xtemp = xtemp + P*x
2086 */
2087 /* first compute P*x
2088 * P = [ -U\inv U_12 ]
2089 * [ I ]
2090 */
2091 /* matvec */
2092 for(i = 0 ; i < nLU ; i ++)
2093 {
2094 ytemp_data[i] = 0.0;
2095 k1 = u_end[i] ; k2 = mU_diag_i[i+1];
2096 for(j = k1 ; j < k2 ; j ++)
2097 {
2098 col = mU_diag_j[j];
2099 ytemp_data[i] -= mU_diag_data[j] * x_data[col-nLU];
2100 }
2101 }
2102 /* U solve */
2103 for(i = nLU-1 ; i >= 0 ; i --)
2104 {
2105 ftemp_data[perm[i]] = ytemp_data[i];
2106 k1 = mU_diag_i[i] ; k2 = u_end[i];
2107 for(j = k1 ; j < k2 ; j ++)
2108 {
2109 col = mU_diag_j[j];
2110 ftemp_data[perm[i]] -= mU_diag_data[j] * ftemp_data[perm[col]];
2111 }
2112 ftemp_data[perm[i]] *= mD[i];
2113 }
2114
2115 /* update with I */
2116 for(i = nLU ; i < n ; i ++)
2117 {
2118 ftemp_data[perm[i]] = x_data[i-nLU];
2119 }
2120 hypre_ParVectorAxpy(beta, ftemp, u);
2121 }
2122
2123 hypre_ParVectorAxpy(beta, xtemp, u);
2124
2125 return hypre_error_flag;
2126 }
2127
2128 /* solve functions for NSH */
2129
2130 /*--------------------------------------------------------------------
2131 * hypre_NSHSolve
2132 *--------------------------------------------------------------------*/
2133 HYPRE_Int
hypre_NSHSolve(void * nsh_vdata,hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u)2134 hypre_NSHSolve( void *nsh_vdata,
2135 hypre_ParCSRMatrix *A,
2136 hypre_ParVector *f,
2137 hypre_ParVector *u )
2138 {
2139 MPI_Comm comm = hypre_ParCSRMatrixComm(A);
2140 // HYPRE_Int i;
2141
2142 hypre_ParNSHData *nsh_data = (hypre_ParNSHData*) nsh_vdata;
2143
2144 /* get matrices */
2145 hypre_ParCSRMatrix *matA = hypre_ParNSHDataMatA(nsh_data);
2146 hypre_ParCSRMatrix *matM = hypre_ParNSHDataMatM(nsh_data);
2147
2148 HYPRE_Int iter, num_procs, my_id;
2149
2150 hypre_ParVector *F_array = hypre_ParNSHDataF(nsh_data);
2151 hypre_ParVector *U_array = hypre_ParNSHDataU(nsh_data);
2152
2153 /* get settings */
2154 HYPRE_Real tol = hypre_ParNSHDataTol(nsh_data);
2155 HYPRE_Int logging = hypre_ParNSHDataLogging(nsh_data);
2156 HYPRE_Int print_level = hypre_ParNSHDataPrintLevel(nsh_data);
2157 HYPRE_Int max_iter = hypre_ParNSHDataMaxIter(nsh_data);
2158 HYPRE_Real *norms = hypre_ParNSHDataRelResNorms(nsh_data);
2159 hypre_ParVector *Ftemp = hypre_ParNSHDataFTemp(nsh_data);
2160 hypre_ParVector *Utemp = hypre_ParNSHDataUTemp(nsh_data);
2161 hypre_ParVector *residual;
2162
2163 HYPRE_Real alpha = -1.0;
2164 HYPRE_Real beta = 1.0;
2165 HYPRE_Real conv_factor = 0.0;
2166 HYPRE_Real resnorm = 1.0;
2167 HYPRE_Real init_resnorm = 0.0;
2168 HYPRE_Real rel_resnorm;
2169 HYPRE_Real rhs_norm = 0.0;
2170 HYPRE_Real old_resnorm;
2171 HYPRE_Real ieee_check = 0.;
2172 HYPRE_Real operat_cmplxty = hypre_ParNSHDataOperatorComplexity(nsh_data);
2173
2174 HYPRE_Int Solve_err_flag;
2175
2176 /* problem size */
2177 // HYPRE_Int n = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
2178
2179 /* begin */
2180 if(logging > 1)
2181 {
2182 residual = hypre_ParNSHDataResidual(nsh_data);
2183 }
2184
2185 hypre_ParNSHDataNumIterations(nsh_data) = 0;
2186
2187 hypre_MPI_Comm_size(comm, &num_procs);
2188 hypre_MPI_Comm_rank(comm,&my_id);
2189
2190 /*-----------------------------------------------------------------------
2191 * Write the solver parameters
2192 *-----------------------------------------------------------------------*/
2193 if (my_id == 0 && print_level > 1)
2194 {
2195 hypre_NSHWriteSolverParams(nsh_data);
2196 }
2197
2198 /*-----------------------------------------------------------------------
2199 * Initialize the solver error flag
2200 *-----------------------------------------------------------------------*/
2201
2202 Solve_err_flag = 0;
2203 /*-----------------------------------------------------------------------
2204 * write some initial info
2205 *-----------------------------------------------------------------------*/
2206
2207 if (my_id == 0 && print_level > 1 && tol > 0.)
2208 {
2209 hypre_printf("\n\n Newton–Schulz–Hotelling SOLVER SOLUTION INFO:\n");
2210 }
2211
2212
2213 /*-----------------------------------------------------------------------
2214 * Compute initial residual and print
2215 *-----------------------------------------------------------------------*/
2216 if (print_level > 1 || logging > 1 || tol > 0.)
2217 {
2218 if ( logging > 1 )
2219 {
2220 hypre_ParVectorCopy(f, residual );
2221 if (tol > 0.0)
2222 {
2223 hypre_ParCSRMatrixMatvec(alpha, A, u, beta, residual );
2224 }
2225 resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
2226 }
2227 else
2228 {
2229 hypre_ParVectorCopy(f, Ftemp);
2230 if (tol > 0.0)
2231 {
2232 hypre_ParCSRMatrixMatvec(alpha, A, u, beta, Ftemp);
2233 }
2234 resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
2235 }
2236
2237 /* Since it is does not diminish performance, attempt to return an error flag
2238 and notify users when they supply bad input. */
2239 if (resnorm != 0.)
2240 {
2241 ieee_check = resnorm/resnorm; /* INF -> NaN conversion */
2242 }
2243 if (ieee_check != ieee_check)
2244 {
2245 /* ...INFs or NaNs in input can make ieee_check a NaN. This test
2246 for ieee_check self-equality works on all IEEE-compliant compilers/
2247 machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
2248 by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be
2249 found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
2250 if (print_level > 0)
2251 {
2252 hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
2253 hypre_printf("ERROR -- hypre_NSHSolve: INFs and/or NaNs detected in input.\n");
2254 hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n");
2255 hypre_printf("ERROR detected by Hypre ... END\n\n\n");
2256 }
2257 hypre_error(HYPRE_ERROR_GENERIC);
2258 return hypre_error_flag;
2259 }
2260
2261 init_resnorm = resnorm;
2262 rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f));
2263 if (rhs_norm > HYPRE_REAL_EPSILON)
2264 {
2265 rel_resnorm = init_resnorm / rhs_norm;
2266 }
2267 else
2268 {
2269 /* rhs is zero, return a zero solution */
2270 hypre_ParVectorSetConstantValues(U_array, 0.0);
2271 if(logging > 0)
2272 {
2273 rel_resnorm = 0.0;
2274 hypre_ParNSHDataFinalRelResidualNorm(nsh_data) = rel_resnorm;
2275 }
2276 return hypre_error_flag;
2277 }
2278 }
2279 else
2280 {
2281 rel_resnorm = 1.;
2282 }
2283
2284 if (my_id == 0 && print_level > 1)
2285 {
2286 hypre_printf(" relative\n");
2287 hypre_printf(" residual factor residual\n");
2288 hypre_printf(" -------- ------ --------\n");
2289 hypre_printf(" Initial %e %e\n",init_resnorm,
2290 rel_resnorm);
2291 }
2292
2293 matA = A;
2294 U_array = u;
2295 F_array = f;
2296
2297 /************** Main Solver Loop - always do 1 iteration ************/
2298 iter = 0;
2299
2300 while ((rel_resnorm >= tol || iter < 1)
2301 && iter < max_iter)
2302 {
2303
2304 /* Do one solve on e = Mr */
2305 hypre_NSHSolveInverse(matA, f, u, matM, Utemp, Ftemp);
2306
2307 /*---------------------------------------------------------------
2308 * Compute residual and residual norm
2309 *----------------------------------------------------------------*/
2310
2311 if (print_level > 1 || logging > 1 || tol > 0.)
2312 {
2313 old_resnorm = resnorm;
2314
2315 if ( logging > 1 ) {
2316 hypre_ParVectorCopy(F_array, residual);
2317 hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, residual );
2318 resnorm = sqrt(hypre_ParVectorInnerProd( residual, residual ));
2319 }
2320 else {
2321 hypre_ParVectorCopy(F_array, Ftemp);
2322 hypre_ParCSRMatrixMatvec(alpha, matA, U_array, beta, Ftemp);
2323 resnorm = sqrt(hypre_ParVectorInnerProd(Ftemp, Ftemp));
2324 }
2325
2326 if (old_resnorm) conv_factor = resnorm / old_resnorm;
2327 else conv_factor = resnorm;
2328 if (rhs_norm > HYPRE_REAL_EPSILON)
2329 {
2330 rel_resnorm = resnorm / rhs_norm;
2331 }
2332 else
2333 {
2334 rel_resnorm = resnorm;
2335 }
2336
2337 norms[iter] = rel_resnorm;
2338 }
2339
2340 ++iter;
2341 hypre_ParNSHDataNumIterations(nsh_data) = iter;
2342 hypre_ParNSHDataFinalRelResidualNorm(nsh_data) = rel_resnorm;
2343
2344 if (my_id == 0 && print_level > 1)
2345 {
2346 hypre_printf(" NSHSolve %2d %e %f %e \n", iter,
2347 resnorm, conv_factor, rel_resnorm);
2348 }
2349 }
2350
2351 /* check convergence within max_iter */
2352 if (iter == max_iter && tol > 0.)
2353 {
2354 Solve_err_flag = 1;
2355 hypre_error(HYPRE_ERROR_CONV);
2356 }
2357
2358 /*-----------------------------------------------------------------------
2359 * Print closing statistics
2360 * Add operator and grid complexity stats
2361 *-----------------------------------------------------------------------*/
2362
2363 if (iter > 0 && init_resnorm)
2364 {
2365 conv_factor = pow((resnorm/init_resnorm),(1.0/(HYPRE_Real) iter));
2366 }
2367 else
2368 {
2369 conv_factor = 1.;
2370 }
2371
2372 if (print_level > 1)
2373 {
2374 /*** compute operator and grid complexity (fill factor) here ?? ***/
2375 if (my_id == 0)
2376 {
2377 if (Solve_err_flag == 1)
2378 {
2379 hypre_printf("\n\n==============================================");
2380 hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
2381 hypre_printf(" within the allowed %d iterations\n",max_iter);
2382 hypre_printf("==============================================");
2383 }
2384 hypre_printf("\n\n Average Convergence Factor = %f \n",conv_factor);
2385 hypre_printf(" operator = %f\n",operat_cmplxty);
2386 }
2387 }
2388 return hypre_error_flag;
2389 }
2390
2391 /* NSH solve
2392 * Simply a matvec on residual with approximate inverse
2393 * A: original matrix
2394 * f: rhs
2395 * u: solution
2396 * M: approximate inverse
2397 * ftemp, utemp: working vectors
2398 */
2399 HYPRE_Int
hypre_NSHSolveInverse(hypre_ParCSRMatrix * A,hypre_ParVector * f,hypre_ParVector * u,hypre_ParCSRMatrix * M,hypre_ParVector * ftemp,hypre_ParVector * utemp)2400 hypre_NSHSolveInverse(hypre_ParCSRMatrix *A, hypre_ParVector *f,
2401 hypre_ParVector *u, hypre_ParCSRMatrix *M,
2402 hypre_ParVector *ftemp, hypre_ParVector *utemp)
2403 {
2404 HYPRE_Real alpha;
2405 HYPRE_Real beta;
2406
2407 /* begin */
2408 alpha = -1.0;
2409 beta = 1.0;
2410 /* r = f-Au */
2411 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A, u, beta, f, ftemp);
2412 /* e = Mr */
2413 hypre_ParCSRMatrixMatvec(1.0, M, ftemp, 0.0, utemp);
2414 /* u = u + e */
2415 hypre_ParVectorAxpy(beta, utemp, u);
2416 return hypre_error_flag;
2417 }
2418