1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #include "_hypre_struct_ls.h"
9 #include "_hypre_struct_mv.hpp"
10 #include "red_black_gs.h"
11 
12 #ifndef hypre_abs
13 #define hypre_abs(a)  (((a)>0) ? (a) : -(a))
14 #endif
15 
16 /*--------------------------------------------------------------------------
17  *--------------------------------------------------------------------------*/
18 
19 HYPRE_Int
hypre_RedBlackConstantCoefGS(void * relax_vdata,hypre_StructMatrix * A,hypre_StructVector * b,hypre_StructVector * x)20 hypre_RedBlackConstantCoefGS( void               *relax_vdata,
21                               hypre_StructMatrix *A,
22                               hypre_StructVector *b,
23                               hypre_StructVector *x )
24 {
25    hypre_RedBlackGSData  *relax_data = (hypre_RedBlackGSData  *)relax_vdata;
26 
27    HYPRE_Int              max_iter    = (relax_data -> max_iter);
28    HYPRE_Int              zero_guess  = (relax_data -> zero_guess);
29    HYPRE_Int              rb_start    = (relax_data -> rb_start);
30    HYPRE_Int              diag_rank   = (relax_data -> diag_rank);
31    hypre_ComputePkg      *compute_pkg = (relax_data -> compute_pkg);
32    HYPRE_Int              ndim = hypre_StructMatrixNDim(A);
33 
34    hypre_CommHandle      *comm_handle;
35 
36    hypre_BoxArrayArray   *compute_box_aa;
37    hypre_BoxArray        *compute_box_a;
38    hypre_Box             *compute_box;
39 
40    hypre_Box             *A_dbox;
41    hypre_Box             *b_dbox;
42    hypre_Box             *x_dbox;
43 
44    HYPRE_Int              Ai, Astart, Ani, Anj;
45    HYPRE_Int              bstart, bni, bnj;
46    HYPRE_Int              xstart, xni, xnj;
47    HYPRE_Int              xoff0, xoff1, xoff2, xoff3, xoff4, xoff5;
48 
49    HYPRE_Real            *Ap;
50    HYPRE_Real            *App;
51    HYPRE_Real            *bp;
52    HYPRE_Real            *xp;
53 
54    /* constant coefficient */
55    HYPRE_Int              constant_coeff= hypre_StructMatrixConstantCoefficient(A);
56    HYPRE_Real             App0, App1, App2, App3, App4, App5, AApd;
57 
58    hypre_IndexRef         start;
59    hypre_Index            loop_size;
60 
61    hypre_StructStencil   *stencil;
62    hypre_Index           *stencil_shape;
63    HYPRE_Int              stencil_size;
64    HYPRE_Int              offd[6];
65 
66    HYPRE_Int              iter, rb, redblack, d;
67    HYPRE_Int              compute_i, i, j;
68    HYPRE_Int              ni, nj, nk;
69 
70    /*----------------------------------------------------------
71     * Initialize some things and deal with special cases
72     *----------------------------------------------------------*/
73 
74    hypre_BeginTiming(relax_data -> time_index);
75 
76    hypre_StructMatrixDestroy(relax_data -> A);
77    hypre_StructVectorDestroy(relax_data -> b);
78    hypre_StructVectorDestroy(relax_data -> x);
79    (relax_data -> A) = hypre_StructMatrixRef(A);
80    (relax_data -> x) = hypre_StructVectorRef(x);
81    (relax_data -> b) = hypre_StructVectorRef(b);
82 
83    (relax_data -> num_iterations) = 0;
84 
85    /* if max_iter is zero, return */
86    if (max_iter == 0)
87    {
88       /* if using a zero initial guess, return zero */
89       if (zero_guess)
90       {
91          hypre_StructVectorSetConstantValues(x, 0.0);
92       }
93 
94       hypre_EndTiming(relax_data -> time_index);
95       return hypre_error_flag;
96    }
97    else
98    {
99       stencil       = hypre_StructMatrixStencil(A);
100       stencil_shape = hypre_StructStencilShape(stencil);
101       stencil_size  = hypre_StructStencilSize(stencil);
102 
103       /* get off-diag entry ranks ready */
104       i = 0;
105       for (j = 0; j < stencil_size; j++)
106       {
107          if (j != diag_rank)
108          {
109             offd[i] = j;
110             i++;
111          }
112       }
113    }
114 
115    hypre_StructVectorClearBoundGhostValues(x, 0);
116 
117    /*----------------------------------------------------------
118     * Do zero_guess iteration
119     *----------------------------------------------------------*/
120 
121    rb = rb_start;
122    iter = 0;
123 
124    if (zero_guess)
125    {
126       for (compute_i = 0; compute_i < 2; compute_i++)
127       {
128          switch(compute_i)
129          {
130             case 0:
131             {
132                compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
133             }
134             break;
135 
136             case 1:
137             {
138                compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
139             }
140             break;
141          }
142 
143          hypre_ForBoxArrayI(i, compute_box_aa)
144          {
145             compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
146 
147             A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
148             b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
149             x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
150 
151             Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
152             bp = hypre_StructVectorBoxData(b, i);
153             xp = hypre_StructVectorBoxData(x, i);
154 
155             hypre_ForBoxI(j, compute_box_a)
156             {
157                compute_box = hypre_BoxArrayBox(compute_box_a, j);
158 
159                start  = hypre_BoxIMin(compute_box);
160                hypre_BoxGetSize(compute_box, loop_size);
161 
162                /* Are we relaxing index start or start+(1,0,0)? */
163                redblack = rb;
164                for (d = 0; d < ndim; d++)
165                {
166                   redblack += hypre_IndexD(start, d);
167                }
168                redblack = hypre_abs(redblack) % 2;
169 
170                bstart = hypre_BoxIndexRank(b_dbox, start);
171                xstart = hypre_BoxIndexRank(x_dbox, start);
172                ni = hypre_IndexX(loop_size);
173                nj = hypre_IndexY(loop_size);
174                nk = hypre_IndexZ(loop_size);
175                bni = hypre_BoxSizeX(b_dbox);
176                xni = hypre_BoxSizeX(x_dbox);
177                bnj = hypre_BoxSizeY(b_dbox);
178                xnj = hypre_BoxSizeY(x_dbox);
179                if (ndim < 3)
180                {
181                   nk = 1;
182                   if (ndim < 2)
183                   {
184                      nj = 1;
185                   }
186                }
187 
188                if (constant_coeff == 1)
189                {
190                   Ai= hypre_CCBoxIndexRank(A_dbox, start);
191                   AApd= 1.0/Ap[Ai];
192 
193                   hypre_RedBlackLoopInit();
194 
195 #define DEVICE_VAR is_device_ptr(xp,bp)
196                   hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
197                                                       bstart,bni,bnj,bi,
198                                                       xstart,xni,xnj,xi);
199                   {
200                      xp[xi] = bp[bi]*AApd;
201                   }
202                   hypre_RedBlackConstantcoefLoopEnd();
203 #undef DEVICE_VAR
204                }
205 
206                else      /* variable coefficient diag */
207                {
208                   Astart = hypre_BoxIndexRank(A_dbox, start);
209                   Ani = hypre_BoxSizeX(A_dbox);
210                   Anj = hypre_BoxSizeY(A_dbox);
211 
212                   hypre_RedBlackLoopInit();
213 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
214                   hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
215                                           Astart,Ani,Anj,Ai,
216                                           bstart,bni,bnj,bi,
217                                           xstart,xni,xnj,xi);
218                   {
219                      xp[xi] = bp[bi] / Ap[Ai];
220                   }
221                   hypre_RedBlackLoopEnd();
222 #undef DEVICE_VAR
223                }
224 
225             }
226          }
227       }
228 
229       rb = (rb + 1) % 2;
230       iter++;
231    }
232 
233    /*----------------------------------------------------------
234     * Do regular iterations
235     *----------------------------------------------------------*/
236 
237    while (iter < 2*max_iter)
238    {
239       for (compute_i = 0; compute_i < 2; compute_i++)
240       {
241          switch(compute_i)
242          {
243             case 0:
244             {
245                xp = hypre_StructVectorData(x);
246                hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
247                compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
248             }
249             break;
250 
251             case 1:
252             {
253                hypre_FinalizeIndtComputations(comm_handle);
254                compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
255             }
256             break;
257          }
258 
259          hypre_ForBoxArrayI(i, compute_box_aa)
260          {
261             compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
262 
263             A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
264             b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
265             x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
266 
267             Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
268             bp = hypre_StructVectorBoxData(b, i);
269             xp = hypre_StructVectorBoxData(x, i);
270 
271             hypre_ForBoxI(j, compute_box_a)
272             {
273                compute_box = hypre_BoxArrayBox(compute_box_a, j);
274 
275                start  = hypre_BoxIMin(compute_box);
276                hypre_BoxGetSize(compute_box, loop_size);
277 
278                /* Are we relaxing index start or start+(1,0,0)? */
279                redblack = rb;
280                for (d = 0; d < ndim; d++)
281                {
282                   redblack += hypre_IndexD(start, d);
283                }
284                redblack = hypre_abs(redblack) % 2;
285 
286                bstart = hypre_BoxIndexRank(b_dbox, start);
287                xstart = hypre_BoxIndexRank(x_dbox, start);
288                ni = hypre_IndexX(loop_size);
289                nj = hypre_IndexY(loop_size);
290                nk = hypre_IndexZ(loop_size);
291                bni= hypre_BoxSizeX(b_dbox);
292                xni= hypre_BoxSizeX(x_dbox);
293                bnj= hypre_BoxSizeY(b_dbox);
294                xnj= hypre_BoxSizeY(x_dbox);
295                Ai = hypre_CCBoxIndexRank(A_dbox, start);
296                if (ndim < 3)
297                {
298                   nk = 1;
299                   if (ndim < 2)
300                   {
301                      nj = 1;
302                   }
303                }
304 
305                switch(stencil_size)
306                {
307                   case 7:
308                      App = hypre_StructMatrixBoxData(A, i, offd[5]);
309                      App5= App[Ai];
310                      App = hypre_StructMatrixBoxData(A, i, offd[4]);
311                      App4= App[Ai];
312                      xoff5 = hypre_BoxOffsetDistance(
313                         x_dbox, stencil_shape[offd[5]]);
314                      xoff4 = hypre_BoxOffsetDistance(
315                         x_dbox, stencil_shape[offd[4]]);
316 
317                   case 5:
318                      App = hypre_StructMatrixBoxData(A, i, offd[3]);
319                      App3= App[Ai];
320                      App = hypre_StructMatrixBoxData(A, i, offd[2]);
321                      App2= App[Ai];
322                      xoff3 = hypre_BoxOffsetDistance(
323                         x_dbox, stencil_shape[offd[3]]);
324                      xoff2 = hypre_BoxOffsetDistance(
325                         x_dbox, stencil_shape[offd[2]]);
326 
327                   case 3:
328                      App = hypre_StructMatrixBoxData(A, i, offd[1]);
329                      App1= App[Ai];
330                      App = hypre_StructMatrixBoxData(A, i, offd[0]);
331                      App0= App[Ai];
332                      xoff1 = hypre_BoxOffsetDistance(
333                         x_dbox, stencil_shape[offd[1]]);
334                      xoff0 = hypre_BoxOffsetDistance(
335                         x_dbox, stencil_shape[offd[0]]);
336                      break;
337                }
338 
339                if (constant_coeff == 1)
340                {
341                   AApd = 1/Ap[Ai];
342 
343                   switch(stencil_size)
344                   {
345                      case 7:
346                         hypre_RedBlackLoopInit();
347 #define DEVICE_VAR is_device_ptr(xp,bp)
348                         hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
349                                                             bstart,bni,bnj,bi,
350                                                             xstart,xni,xnj,xi);
351                         {
352                            xp[xi] =
353                               (bp[bi] -
354                                App0*xp[xi + xoff0] -
355                                App1*xp[xi + xoff1] -
356                                App2*xp[xi + xoff2] -
357                                App3*xp[xi + xoff3] -
358                                App4*xp[xi + xoff4] -
359                                App5*xp[xi + xoff5])*AApd;
360                         }
361                         hypre_RedBlackConstantcoefLoopEnd();
362 #undef DEVICE_VAR
363 
364                         break;
365 
366                      case 5:
367                         hypre_RedBlackLoopInit();
368 #define DEVICE_VAR is_device_ptr(xp,bp)
369                         hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
370                                                             bstart,bni,bnj,bi,
371                                                             xstart,xni,xnj,xi);
372                         {
373                            xp[xi] =
374                               (bp[bi] -
375                                App0*xp[xi + xoff0] -
376                                App1*xp[xi + xoff1] -
377                                App2*xp[xi + xoff2] -
378                                App3*xp[xi + xoff3])*AApd;
379                         }
380                         hypre_RedBlackConstantcoefLoopEnd();
381 #undef DEVICE_VAR
382                         break;
383 
384                      case 3:
385                         hypre_RedBlackLoopInit();
386 #define DEVICE_VAR is_device_ptr(xp,bp)
387                         hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,
388                                                             bstart,bni,bnj,bi,
389                                                             xstart,xni,xnj,xi);
390                         {
391                            xp[xi] =
392                               (bp[bi] -
393                                App0*xp[xi + xoff0] -
394                                App1*xp[xi + xoff1])*AApd;
395                         }
396                         hypre_RedBlackConstantcoefLoopEnd();
397 #undef DEVICE_VAR
398                         break;
399                   }
400 
401                }  /* if (constant_coeff == 1) */
402 
403                else /* variable diagonal */
404                {
405                   Astart = hypre_BoxIndexRank(A_dbox, start);
406                   Ani = hypre_BoxSizeX(A_dbox);
407                   Anj = hypre_BoxSizeY(A_dbox);
408 
409                   switch(stencil_size)
410                   {
411                      case 7:
412                         hypre_RedBlackLoopInit();
413 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
414                         hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
415                                                 Astart,Ani,Anj,Ai,
416                                                 bstart,bni,bnj,bi,
417                                                 xstart,xni,xnj,xi);
418                         {
419                            xp[xi] =
420                               (bp[bi] -
421                                App0*xp[xi + xoff0] -
422                                App1*xp[xi + xoff1] -
423                                App2*xp[xi + xoff2] -
424                                App3*xp[xi + xoff3] -
425                                App4*xp[xi + xoff4] -
426                                App5*xp[xi + xoff5]) / Ap[Ai];
427                         }
428                         hypre_RedBlackLoopEnd();
429 #undef DEVICE_VAR
430                         break;
431 
432                      case 5:
433                         hypre_RedBlackLoopInit();
434 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
435                         hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
436                                                 Astart,Ani,Anj,Ai,
437                                                 bstart,bni,bnj,bi,
438                                                 xstart,xni,xnj,xi);
439                         {
440                            xp[xi] =
441                               (bp[bi] -
442                                App0*xp[xi + xoff0] -
443                                App1*xp[xi + xoff1] -
444                                App2*xp[xi + xoff2] -
445                                App3*xp[xi + xoff3]) / Ap[Ai];
446                         }
447                         hypre_RedBlackLoopEnd();
448 #undef DEVICE_VAR
449                         break;
450 
451                      case 3:
452                         hypre_RedBlackLoopInit();
453 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
454                         hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
455                                                 Astart,Ani,Anj,Ai,
456                                                 bstart,bni,bnj,bi,
457                                                 xstart,xni,xnj,xi);
458                         {
459                            xp[xi] =
460                               (bp[bi] -
461                                App0*xp[xi + xoff0] -
462                                App1*xp[xi + xoff1]) / Ap[Ai];
463                         }
464                         hypre_RedBlackLoopEnd();
465 #undef DEVICE_VAR
466                         break;
467 
468                   }  /* switch(stencil_size) */
469                }     /* else */
470             }
471          }
472       }
473 
474       rb = (rb + 1) % 2;
475       iter++;
476    }
477 
478    (relax_data -> num_iterations) = iter / 2;
479 
480    /*-----------------------------------------------------------------------
481     * Return
482     *-----------------------------------------------------------------------*/
483 
484    hypre_IncFLOPCount(relax_data -> flops);
485    hypre_EndTiming(relax_data -> time_index);
486 
487    return hypre_error_flag;
488 }
489 
490 
491