1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_struct_ls.h"
9 #include "_hypre_struct_mv.hpp"
10 #include "red_black_gs.h"
11
12 #ifndef hypre_abs
13 #define hypre_abs(a) (((a)>0) ? (a) : -(a))
14 #endif
15
16 /*--------------------------------------------------------------------------
17 *--------------------------------------------------------------------------*/
18
19 HYPRE_Int
hypre_RedBlackConstantCoefGS(void * relax_vdata,hypre_StructMatrix * A,hypre_StructVector * b,hypre_StructVector * x)20 hypre_RedBlackConstantCoefGS( void *relax_vdata,
21 hypre_StructMatrix *A,
22 hypre_StructVector *b,
23 hypre_StructVector *x )
24 {
25 hypre_RedBlackGSData *relax_data = (hypre_RedBlackGSData *)relax_vdata;
26
27 HYPRE_Int max_iter = (relax_data -> max_iter);
28 HYPRE_Int zero_guess = (relax_data -> zero_guess);
29 HYPRE_Int rb_start = (relax_data -> rb_start);
30 HYPRE_Int diag_rank = (relax_data -> diag_rank);
31 hypre_ComputePkg *compute_pkg = (relax_data -> compute_pkg);
32 HYPRE_Int ndim = hypre_StructMatrixNDim(A);
33
34 hypre_CommHandle *comm_handle;
35
36 hypre_BoxArrayArray *compute_box_aa;
37 hypre_BoxArray *compute_box_a;
38 hypre_Box *compute_box;
39
40 hypre_Box *A_dbox;
41 hypre_Box *b_dbox;
42 hypre_Box *x_dbox;
43
44 HYPRE_Int Ai, Astart, Ani, Anj;
45 HYPRE_Int bstart, bni, bnj;
46 HYPRE_Int xstart, xni, xnj;
47 HYPRE_Int xoff0, xoff1, xoff2, xoff3, xoff4, xoff5;
48
49 HYPRE_Real *Ap;
50 HYPRE_Real *App;
51 HYPRE_Real *bp;
52 HYPRE_Real *xp;
53
54 /* constant coefficient */
55 HYPRE_Int constant_coeff= hypre_StructMatrixConstantCoefficient(A);
56 HYPRE_Real App0, App1, App2, App3, App4, App5, AApd;
57
58 hypre_IndexRef start;
59 hypre_Index loop_size;
60
61 hypre_StructStencil *stencil;
62 hypre_Index *stencil_shape;
63 HYPRE_Int stencil_size;
64 HYPRE_Int offd[6];
65
66 HYPRE_Int iter, rb, redblack, d;
67 HYPRE_Int compute_i, i, j;
68 HYPRE_Int ni, nj, nk;
69
70 /*----------------------------------------------------------
71 * Initialize some things and deal with special cases
72 *----------------------------------------------------------*/
73
74 hypre_BeginTiming(relax_data -> time_index);
75
76 hypre_StructMatrixDestroy(relax_data -> A);
77 hypre_StructVectorDestroy(relax_data -> b);
78 hypre_StructVectorDestroy(relax_data -> x);
79 (relax_data -> A) = hypre_StructMatrixRef(A);
80 (relax_data -> x) = hypre_StructVectorRef(x);
81 (relax_data -> b) = hypre_StructVectorRef(b);
82
83 (relax_data -> num_iterations) = 0;
84
85 /* if max_iter is zero, return */
86 if (max_iter == 0)
87 {
88 /* if using a zero initial guess, return zero */
89 if (zero_guess)
90 {
91 hypre_StructVectorSetConstantValues(x, 0.0);
92 }
93
94 hypre_EndTiming(relax_data -> time_index);
95 return hypre_error_flag;
96 }
97 else
98 {
99 stencil = hypre_StructMatrixStencil(A);
100 stencil_shape = hypre_StructStencilShape(stencil);
101 stencil_size = hypre_StructStencilSize(stencil);
102
103 /* get off-diag entry ranks ready */
104 i = 0;
105 for (j = 0; j < stencil_size; j++)
106 {
107 if (j != diag_rank)
108 {
109 offd[i] = j;
110 i++;
111 }
112 }
113 }
114
115 hypre_StructVectorClearBoundGhostValues(x, 0);
116
117 /*----------------------------------------------------------
118 * Do zero_guess iteration
119 *----------------------------------------------------------*/
120
121 rb = rb_start;
122 iter = 0;
123
124 if (zero_guess)
125 {
126 for (compute_i = 0; compute_i < 2; compute_i++)
127 {
128 switch(compute_i)
129 {
130 case 0:
131 {
132 compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
133 }
134 break;
135
136 case 1:
137 {
138 compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
139 }
140 break;
141 }
142
143 hypre_ForBoxArrayI(i, compute_box_aa)
144 {
145 compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
146
147 A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
148 b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
149 x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
150
151 Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
152 bp = hypre_StructVectorBoxData(b, i);
153 xp = hypre_StructVectorBoxData(x, i);
154
155 hypre_ForBoxI(j, compute_box_a)
156 {
157 compute_box = hypre_BoxArrayBox(compute_box_a, j);
158
159 start = hypre_BoxIMin(compute_box);
160 hypre_BoxGetSize(compute_box, loop_size);
161
162 /* Are we relaxing index start or start+(1,0,0)? */
163 redblack = rb;
164 for (d = 0; d < ndim; d++)
165 {
166 redblack += hypre_IndexD(start, d);
167 }
168 redblack = hypre_abs(redblack) % 2;
169
170 bstart = hypre_BoxIndexRank(b_dbox, start);
171 xstart = hypre_BoxIndexRank(x_dbox, start);
172 ni = hypre_IndexX(loop_size);
173 nj = hypre_IndexY(loop_size);
174 nk = hypre_IndexZ(loop_size);
175 bni = hypre_BoxSizeX(b_dbox);
176 xni = hypre_BoxSizeX(x_dbox);
177 bnj = hypre_BoxSizeY(b_dbox);
178 xnj = hypre_BoxSizeY(x_dbox);
179 if (ndim < 3)
180 {
181 nk = 1;
182 if (ndim < 2)
183 {
184 nj = 1;
185 }
186 }
187
188 if (constant_coeff == 1)
189 {
190 Ai= hypre_CCBoxIndexRank(A_dbox, start);
191 AApd= 1.0/Ap[Ai];
192
193 hypre_RedBlackLoopInit();
194
195 #define DEVICE_VAR is_device_ptr(xp,bp)
196 hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
197 bstart,bni,bnj,bi,
198 xstart,xni,xnj,xi);
199 {
200 xp[xi] = bp[bi]*AApd;
201 }
202 hypre_RedBlackConstantcoefLoopEnd();
203 #undef DEVICE_VAR
204 }
205
206 else /* variable coefficient diag */
207 {
208 Astart = hypre_BoxIndexRank(A_dbox, start);
209 Ani = hypre_BoxSizeX(A_dbox);
210 Anj = hypre_BoxSizeY(A_dbox);
211
212 hypre_RedBlackLoopInit();
213 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
214 hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
215 Astart,Ani,Anj,Ai,
216 bstart,bni,bnj,bi,
217 xstart,xni,xnj,xi);
218 {
219 xp[xi] = bp[bi] / Ap[Ai];
220 }
221 hypre_RedBlackLoopEnd();
222 #undef DEVICE_VAR
223 }
224
225 }
226 }
227 }
228
229 rb = (rb + 1) % 2;
230 iter++;
231 }
232
233 /*----------------------------------------------------------
234 * Do regular iterations
235 *----------------------------------------------------------*/
236
237 while (iter < 2*max_iter)
238 {
239 for (compute_i = 0; compute_i < 2; compute_i++)
240 {
241 switch(compute_i)
242 {
243 case 0:
244 {
245 xp = hypre_StructVectorData(x);
246 hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
247 compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
248 }
249 break;
250
251 case 1:
252 {
253 hypre_FinalizeIndtComputations(comm_handle);
254 compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
255 }
256 break;
257 }
258
259 hypre_ForBoxArrayI(i, compute_box_aa)
260 {
261 compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
262
263 A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
264 b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
265 x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
266
267 Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
268 bp = hypre_StructVectorBoxData(b, i);
269 xp = hypre_StructVectorBoxData(x, i);
270
271 hypre_ForBoxI(j, compute_box_a)
272 {
273 compute_box = hypre_BoxArrayBox(compute_box_a, j);
274
275 start = hypre_BoxIMin(compute_box);
276 hypre_BoxGetSize(compute_box, loop_size);
277
278 /* Are we relaxing index start or start+(1,0,0)? */
279 redblack = rb;
280 for (d = 0; d < ndim; d++)
281 {
282 redblack += hypre_IndexD(start, d);
283 }
284 redblack = hypre_abs(redblack) % 2;
285
286 bstart = hypre_BoxIndexRank(b_dbox, start);
287 xstart = hypre_BoxIndexRank(x_dbox, start);
288 ni = hypre_IndexX(loop_size);
289 nj = hypre_IndexY(loop_size);
290 nk = hypre_IndexZ(loop_size);
291 bni= hypre_BoxSizeX(b_dbox);
292 xni= hypre_BoxSizeX(x_dbox);
293 bnj= hypre_BoxSizeY(b_dbox);
294 xnj= hypre_BoxSizeY(x_dbox);
295 Ai = hypre_CCBoxIndexRank(A_dbox, start);
296 if (ndim < 3)
297 {
298 nk = 1;
299 if (ndim < 2)
300 {
301 nj = 1;
302 }
303 }
304
305 switch(stencil_size)
306 {
307 case 7:
308 App = hypre_StructMatrixBoxData(A, i, offd[5]);
309 App5= App[Ai];
310 App = hypre_StructMatrixBoxData(A, i, offd[4]);
311 App4= App[Ai];
312 xoff5 = hypre_BoxOffsetDistance(
313 x_dbox, stencil_shape[offd[5]]);
314 xoff4 = hypre_BoxOffsetDistance(
315 x_dbox, stencil_shape[offd[4]]);
316
317 case 5:
318 App = hypre_StructMatrixBoxData(A, i, offd[3]);
319 App3= App[Ai];
320 App = hypre_StructMatrixBoxData(A, i, offd[2]);
321 App2= App[Ai];
322 xoff3 = hypre_BoxOffsetDistance(
323 x_dbox, stencil_shape[offd[3]]);
324 xoff2 = hypre_BoxOffsetDistance(
325 x_dbox, stencil_shape[offd[2]]);
326
327 case 3:
328 App = hypre_StructMatrixBoxData(A, i, offd[1]);
329 App1= App[Ai];
330 App = hypre_StructMatrixBoxData(A, i, offd[0]);
331 App0= App[Ai];
332 xoff1 = hypre_BoxOffsetDistance(
333 x_dbox, stencil_shape[offd[1]]);
334 xoff0 = hypre_BoxOffsetDistance(
335 x_dbox, stencil_shape[offd[0]]);
336 break;
337 }
338
339 if (constant_coeff == 1)
340 {
341 AApd = 1/Ap[Ai];
342
343 switch(stencil_size)
344 {
345 case 7:
346 hypre_RedBlackLoopInit();
347 #define DEVICE_VAR is_device_ptr(xp,bp)
348 hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
349 bstart,bni,bnj,bi,
350 xstart,xni,xnj,xi);
351 {
352 xp[xi] =
353 (bp[bi] -
354 App0*xp[xi + xoff0] -
355 App1*xp[xi + xoff1] -
356 App2*xp[xi + xoff2] -
357 App3*xp[xi + xoff3] -
358 App4*xp[xi + xoff4] -
359 App5*xp[xi + xoff5])*AApd;
360 }
361 hypre_RedBlackConstantcoefLoopEnd();
362 #undef DEVICE_VAR
363
364 break;
365
366 case 5:
367 hypre_RedBlackLoopInit();
368 #define DEVICE_VAR is_device_ptr(xp,bp)
369 hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
370 bstart,bni,bnj,bi,
371 xstart,xni,xnj,xi);
372 {
373 xp[xi] =
374 (bp[bi] -
375 App0*xp[xi + xoff0] -
376 App1*xp[xi + xoff1] -
377 App2*xp[xi + xoff2] -
378 App3*xp[xi + xoff3])*AApd;
379 }
380 hypre_RedBlackConstantcoefLoopEnd();
381 #undef DEVICE_VAR
382 break;
383
384 case 3:
385 hypre_RedBlackLoopInit();
386 #define DEVICE_VAR is_device_ptr(xp,bp)
387 hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
388 bstart,bni,bnj,bi,
389 xstart,xni,xnj,xi);
390 {
391 xp[xi] =
392 (bp[bi] -
393 App0*xp[xi + xoff0] -
394 App1*xp[xi + xoff1])*AApd;
395 }
396 hypre_RedBlackConstantcoefLoopEnd();
397 #undef DEVICE_VAR
398 break;
399 }
400
401 } /* if (constant_coeff == 1) */
402
403 else /* variable diagonal */
404 {
405 Astart = hypre_BoxIndexRank(A_dbox, start);
406 Ani = hypre_BoxSizeX(A_dbox);
407 Anj = hypre_BoxSizeY(A_dbox);
408
409 switch(stencil_size)
410 {
411 case 7:
412 hypre_RedBlackLoopInit();
413 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
414 hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
415 Astart,Ani,Anj,Ai,
416 bstart,bni,bnj,bi,
417 xstart,xni,xnj,xi);
418 {
419 xp[xi] =
420 (bp[bi] -
421 App0*xp[xi + xoff0] -
422 App1*xp[xi + xoff1] -
423 App2*xp[xi + xoff2] -
424 App3*xp[xi + xoff3] -
425 App4*xp[xi + xoff4] -
426 App5*xp[xi + xoff5]) / Ap[Ai];
427 }
428 hypre_RedBlackLoopEnd();
429 #undef DEVICE_VAR
430 break;
431
432 case 5:
433 hypre_RedBlackLoopInit();
434 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
435 hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
436 Astart,Ani,Anj,Ai,
437 bstart,bni,bnj,bi,
438 xstart,xni,xnj,xi);
439 {
440 xp[xi] =
441 (bp[bi] -
442 App0*xp[xi + xoff0] -
443 App1*xp[xi + xoff1] -
444 App2*xp[xi + xoff2] -
445 App3*xp[xi + xoff3]) / Ap[Ai];
446 }
447 hypre_RedBlackLoopEnd();
448 #undef DEVICE_VAR
449 break;
450
451 case 3:
452 hypre_RedBlackLoopInit();
453 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
454 hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
455 Astart,Ani,Anj,Ai,
456 bstart,bni,bnj,bi,
457 xstart,xni,xnj,xi);
458 {
459 xp[xi] =
460 (bp[bi] -
461 App0*xp[xi + xoff0] -
462 App1*xp[xi + xoff1]) / Ap[Ai];
463 }
464 hypre_RedBlackLoopEnd();
465 #undef DEVICE_VAR
466 break;
467
468 } /* switch(stencil_size) */
469 } /* else */
470 }
471 }
472 }
473
474 rb = (rb + 1) % 2;
475 iter++;
476 }
477
478 (relax_data -> num_iterations) = iter / 2;
479
480 /*-----------------------------------------------------------------------
481 * Return
482 *-----------------------------------------------------------------------*/
483
484 hypre_IncFLOPCount(relax_data -> flops);
485 hypre_EndTiming(relax_data -> time_index);
486
487 return hypre_error_flag;
488 }
489
490
491