1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * This routine assumes a 3-pt (1D), 5-pt (2D), or 7-pt (3D) stencil.
11  *
12  *****************************************************************************/
13 
14 #include "_hypre_struct_ls.h"
15 #include "_hypre_struct_mv.hpp"
16 #include "red_black_gs.h"
17 
18 /*--------------------------------------------------------------------------
19  *--------------------------------------------------------------------------*/
20 
21 void *
hypre_RedBlackGSCreate(MPI_Comm comm)22 hypre_RedBlackGSCreate( MPI_Comm  comm )
23 {
24    hypre_RedBlackGSData *relax_data;
25 
26    relax_data = hypre_CTAlloc(hypre_RedBlackGSData,  1, HYPRE_MEMORY_HOST);
27 
28    (relax_data -> comm)       = comm;
29    (relax_data -> time_index) = hypre_InitializeTiming("RedBlackGS");
30 
31    /* set defaults */
32    (relax_data -> tol)         = 1.0e-06;
33    (relax_data -> max_iter)    = 1000;
34    (relax_data -> rel_change)  = 0;
35    (relax_data -> zero_guess)  = 0;
36    (relax_data -> rb_start)    = 1;
37    (relax_data -> flops)       = 0;
38    (relax_data -> A)           = NULL;
39    (relax_data -> b)           = NULL;
40    (relax_data -> x)           = NULL;
41    (relax_data -> compute_pkg) = NULL;
42 
43    return (void *) relax_data;
44 }
45 
46 /*--------------------------------------------------------------------------
47  *--------------------------------------------------------------------------*/
48 
49 HYPRE_Int
hypre_RedBlackGSDestroy(void * relax_vdata)50 hypre_RedBlackGSDestroy( void *relax_vdata )
51 {
52    hypre_RedBlackGSData *relax_data = (hypre_RedBlackGSData *)relax_vdata;
53 
54    if (relax_data)
55    {
56       hypre_StructMatrixDestroy(relax_data -> A);
57       hypre_StructVectorDestroy(relax_data -> b);
58       hypre_StructVectorDestroy(relax_data -> x);
59       hypre_ComputePkgDestroy(relax_data -> compute_pkg);
60 
61       hypre_FinalizeTiming(relax_data -> time_index);
62       hypre_TFree(relax_data, HYPRE_MEMORY_HOST);
63    }
64 
65    return hypre_error_flag;
66 }
67 
68 /*--------------------------------------------------------------------------
69  *--------------------------------------------------------------------------*/
70 
71 HYPRE_Int
hypre_RedBlackGSSetup(void * relax_vdata,hypre_StructMatrix * A,hypre_StructVector * b,hypre_StructVector * x)72 hypre_RedBlackGSSetup( void               *relax_vdata,
73                        hypre_StructMatrix *A,
74                        hypre_StructVector *b,
75                        hypre_StructVector *x )
76 {
77    hypre_RedBlackGSData  *relax_data = (hypre_RedBlackGSData *)relax_vdata;
78 
79    HYPRE_Int              diag_rank;
80    hypre_ComputePkg      *compute_pkg;
81 
82    hypre_StructGrid      *grid;
83    hypre_StructStencil   *stencil;
84    hypre_Index            diag_index;
85    hypre_ComputeInfo     *compute_info;
86 
87    /*----------------------------------------------------------
88     * Find the matrix diagonal
89     *----------------------------------------------------------*/
90 
91    grid    = hypre_StructMatrixGrid(A);
92    stencil = hypre_StructMatrixStencil(A);
93 
94    hypre_SetIndex3(diag_index, 0, 0, 0);
95    diag_rank = hypre_StructStencilElementRank(stencil, diag_index);
96 
97    /*----------------------------------------------------------
98     * Set up the compute packages
99     *----------------------------------------------------------*/
100 
101    hypre_CreateComputeInfo(grid, stencil, &compute_info);
102    hypre_ComputePkgCreate(compute_info, hypre_StructVectorDataSpace(x), 1,
103                           grid, &compute_pkg);
104 
105    /*----------------------------------------------------------
106     * Set up the relax data structure
107     *----------------------------------------------------------*/
108 
109    (relax_data -> A) = hypre_StructMatrixRef(A);
110    (relax_data -> x) = hypre_StructVectorRef(x);
111    (relax_data -> b) = hypre_StructVectorRef(b);
112    (relax_data -> diag_rank) = diag_rank;
113    (relax_data -> compute_pkg) = compute_pkg;
114 
115    return hypre_error_flag;
116 }
117 
118 /*--------------------------------------------------------------------------
119  *--------------------------------------------------------------------------*/
120 
121 HYPRE_Int
hypre_RedBlackGS(void * relax_vdata,hypre_StructMatrix * A,hypre_StructVector * b,hypre_StructVector * x)122 hypre_RedBlackGS( void               *relax_vdata,
123                   hypre_StructMatrix *A,
124                   hypre_StructVector *b,
125                   hypre_StructVector *x )
126 {
127    hypre_RedBlackGSData  *relax_data = (hypre_RedBlackGSData *)relax_vdata;
128 
129    HYPRE_Int              max_iter    = (relax_data -> max_iter);
130    HYPRE_Int              zero_guess  = (relax_data -> zero_guess);
131    HYPRE_Int              rb_start    = (relax_data -> rb_start);
132    HYPRE_Int              diag_rank   = (relax_data -> diag_rank);
133    hypre_ComputePkg      *compute_pkg = (relax_data -> compute_pkg);
134    HYPRE_Int              ndim = hypre_StructMatrixNDim(A);
135 
136    hypre_CommHandle      *comm_handle;
137 
138    hypre_BoxArrayArray   *compute_box_aa;
139    hypre_BoxArray        *compute_box_a;
140    hypre_Box             *compute_box;
141 
142    hypre_Box             *A_dbox;
143    hypre_Box             *b_dbox;
144    hypre_Box             *x_dbox;
145 
146    HYPRE_Int              Astart, Ani, Anj;
147    HYPRE_Int              bstart, bni, bnj;
148    HYPRE_Int              xstart, xni, xnj;
149    HYPRE_Int              xoff0, xoff1, xoff2, xoff3, xoff4, xoff5;
150 
151    HYPRE_Real            *Ap;
152    HYPRE_Real            *Ap0, *Ap1, *Ap2, *Ap3, *Ap4, *Ap5;
153    HYPRE_Real            *bp;
154    HYPRE_Real            *xp;
155 
156    hypre_IndexRef         start;
157    hypre_Index            loop_size;
158 
159    hypre_StructStencil   *stencil;
160    hypre_Index           *stencil_shape;
161    HYPRE_Int              stencil_size;
162    HYPRE_Int              offd[6];
163 
164    HYPRE_Int              iter, rb, redblack, d;
165    HYPRE_Int              compute_i, i, j;
166    HYPRE_Int              ni, nj, nk;
167 
168    /*----------------------------------------------------------
169     * Initialize some things and deal with special cases
170     *----------------------------------------------------------*/
171 
172    hypre_BeginTiming(relax_data -> time_index);
173 
174    hypre_StructMatrixDestroy(relax_data -> A);
175    hypre_StructVectorDestroy(relax_data -> b);
176    hypre_StructVectorDestroy(relax_data -> x);
177    (relax_data -> A) = hypre_StructMatrixRef(A);
178    (relax_data -> x) = hypre_StructVectorRef(x);
179    (relax_data -> b) = hypre_StructVectorRef(b);
180 
181    (relax_data -> num_iterations) = 0;
182 
183    /* if max_iter is zero, return */
184    if (max_iter == 0)
185    {
186       /* if using a zero initial guess, return zero */
187       if (zero_guess)
188       {
189          hypre_StructVectorSetConstantValues(x, 0.0);
190       }
191 
192       hypre_EndTiming(relax_data -> time_index);
193       return hypre_error_flag;
194    }
195    else
196    {
197       stencil       = hypre_StructMatrixStencil(A);
198       stencil_shape = hypre_StructStencilShape(stencil);
199       stencil_size  = hypre_StructStencilSize(stencil);
200 
201       /* get off-diag entry ranks ready */
202       i = 0;
203       for (j = 0; j < stencil_size; j++)
204       {
205          if (j != diag_rank)
206          {
207             offd[i] = j;
208             i++;
209          }
210       }
211    }
212 
213    /*----------------------------------------------------------
214     * Do zero_guess iteration
215     *----------------------------------------------------------*/
216 
217    rb = rb_start;
218    iter = 0;
219 
220    if (zero_guess)
221    {
222       for (compute_i = 0; compute_i < 2; compute_i++)
223       {
224          switch(compute_i)
225          {
226             case 0:
227             {
228                compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
229             }
230             break;
231 
232             case 1:
233             {
234                compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
235             }
236             break;
237          }
238 
239          hypre_ForBoxArrayI(i, compute_box_aa)
240          {
241             compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
242 
243             A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
244             b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
245             x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
246 
247             Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
248             bp = hypre_StructVectorBoxData(b, i);
249             xp = hypre_StructVectorBoxData(x, i);
250 
251             hypre_ForBoxI(j, compute_box_a)
252             {
253                compute_box = hypre_BoxArrayBox(compute_box_a, j);
254 
255                start  = hypre_BoxIMin(compute_box);
256                hypre_BoxGetSize(compute_box, loop_size);
257 
258                /* Are we relaxing index start or start+(1,0,0)? */
259                redblack = rb;
260                for (d = 0; d < ndim; d++)
261                {
262                   redblack += hypre_IndexD(start, d);
263                }
264                redblack = hypre_abs(redblack) % 2;
265 
266                Astart = hypre_BoxIndexRank(A_dbox, start);
267                bstart = hypre_BoxIndexRank(b_dbox, start);
268                xstart = hypre_BoxIndexRank(x_dbox, start);
269                ni = hypre_IndexX(loop_size);
270                nj = hypre_IndexY(loop_size);
271                nk = hypre_IndexZ(loop_size);
272                Ani = hypre_BoxSizeX(A_dbox);
273                bni = hypre_BoxSizeX(b_dbox);
274                xni = hypre_BoxSizeX(x_dbox);
275                Anj = hypre_BoxSizeY(A_dbox);
276                bnj = hypre_BoxSizeY(b_dbox);
277                xnj = hypre_BoxSizeY(x_dbox);
278                if (ndim < 3)
279                {
280                   nk = 1;
281                   if (ndim < 2)
282                   {
283                      nj = 1;
284                   }
285                }
286 
287                hypre_RedBlackLoopInit();
288 #define DEVICE_VAR is_device_ptr(xp,bp,Ap)
289                hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
290                                        Astart,Ani,Anj,Ai,
291                                        bstart,bni,bnj,bi,
292                                        xstart,xni,xnj,xi);
293                {
294                   xp[xi] = bp[bi] / Ap[Ai];
295                }
296                hypre_RedBlackLoopEnd();
297 #undef DEVICE_VAR
298             }
299          }
300       }
301 
302       rb = (rb + 1) % 2;
303       iter++;
304    }
305 
306    /*----------------------------------------------------------
307     * Do regular iterations
308     *----------------------------------------------------------*/
309 
310    while (iter < 2*max_iter)
311    {
312       for (compute_i = 0; compute_i < 2; compute_i++)
313       {
314          switch(compute_i)
315          {
316             case 0:
317             {
318                xp = hypre_StructVectorData(x);
319                hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
320                compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
321             }
322             break;
323 
324             case 1:
325             {
326                hypre_FinalizeIndtComputations(comm_handle);
327                compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
328             }
329             break;
330          }
331 
332          hypre_ForBoxArrayI(i, compute_box_aa)
333          {
334             compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
335 
336             A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
337             b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
338             x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
339 
340             Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
341             bp = hypre_StructVectorBoxData(b, i);
342             xp = hypre_StructVectorBoxData(x, i);
343 
344             hypre_ForBoxI(j, compute_box_a)
345             {
346                compute_box = hypre_BoxArrayBox(compute_box_a, j);
347 
348                start  = hypre_BoxIMin(compute_box);
349                hypre_BoxGetSize(compute_box, loop_size);
350 
351                /* Are we relaxing index start or start+(1,0,0)? */
352                redblack = rb;
353                for (d = 0; d < ndim; d++)
354                {
355                   redblack += hypre_IndexD(start, d);
356                }
357                redblack = hypre_abs(redblack) % 2;
358 
359                Astart = hypre_BoxIndexRank(A_dbox, start);
360                bstart = hypre_BoxIndexRank(b_dbox, start);
361                xstart = hypre_BoxIndexRank(x_dbox, start);
362                ni = hypre_IndexX(loop_size);
363                nj = hypre_IndexY(loop_size);
364                nk = hypre_IndexZ(loop_size);
365                Ani = hypre_BoxSizeX(A_dbox);
366                bni = hypre_BoxSizeX(b_dbox);
367                xni = hypre_BoxSizeX(x_dbox);
368                Anj = hypre_BoxSizeY(A_dbox);
369                bnj = hypre_BoxSizeY(b_dbox);
370                xnj = hypre_BoxSizeY(x_dbox);
371                if (ndim < 3)
372                {
373                   nk = 1;
374                   if (ndim < 2)
375                   {
376                      nj = 1;
377                   }
378                }
379 
380                switch(stencil_size)
381                {
382                   case 7:
383                      Ap5 = hypre_StructMatrixBoxData(A, i, offd[5]);
384                      Ap4 = hypre_StructMatrixBoxData(A, i, offd[4]);
385                      xoff5 = hypre_BoxOffsetDistance(
386                         x_dbox, stencil_shape[offd[5]]);
387                      xoff4 = hypre_BoxOffsetDistance(
388                         x_dbox, stencil_shape[offd[4]]);
389 
390                   case 5:
391                      Ap3 = hypre_StructMatrixBoxData(A, i, offd[3]);
392                      Ap2 = hypre_StructMatrixBoxData(A, i, offd[2]);
393                      xoff3 = hypre_BoxOffsetDistance(
394                         x_dbox, stencil_shape[offd[3]]);
395                      xoff2 = hypre_BoxOffsetDistance(
396                         x_dbox, stencil_shape[offd[2]]);
397 
398                   case 3:
399                      Ap1 = hypre_StructMatrixBoxData(A, i, offd[1]);
400                      Ap0 = hypre_StructMatrixBoxData(A, i, offd[0]);
401                      xoff1 = hypre_BoxOffsetDistance(
402                         x_dbox, stencil_shape[offd[1]]);
403                      xoff0 = hypre_BoxOffsetDistance(
404                         x_dbox, stencil_shape[offd[0]]);
405                      break;
406                }
407 
408                switch(stencil_size)
409                {
410                   case 7:
411                      hypre_RedBlackLoopInit();
412 #define DEVICE_VAR is_device_ptr(xp,bp,Ap0,Ap1,Ap2,Ap3,Ap4,Ap5,Ap)
413                      hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
414                                              Astart,Ani,Anj,Ai,
415                                              bstart,bni,bnj,bi,
416                                              xstart,xni,xnj,xi);
417                      {
418                         xp[xi] =
419                            (bp[bi] -
420                             Ap0[Ai] * xp[xi + xoff0] -
421                             Ap1[Ai] * xp[xi + xoff1] -
422                             Ap2[Ai] * xp[xi + xoff2] -
423                             Ap3[Ai] * xp[xi + xoff3] -
424                             Ap4[Ai] * xp[xi + xoff4] -
425                             Ap5[Ai] * xp[xi + xoff5]) / Ap[Ai];
426                      }
427                      hypre_RedBlackLoopEnd();
428 #undef DEVICE_VAR
429                      break;
430 
431                   case 5:
432                      hypre_RedBlackLoopInit();
433 #define DEVICE_VAR is_device_ptr(xp,bp,Ap0,Ap1,Ap2,Ap3,Ap)
434                      hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
435                                              Astart,Ani,Anj,Ai,
436                                              bstart,bni,bnj,bi,
437                                              xstart,xni,xnj,xi);
438                      {
439                         xp[xi] =
440                            (bp[bi] -
441                             Ap0[Ai] * xp[xi + xoff0] -
442                             Ap1[Ai] * xp[xi + xoff1] -
443                             Ap2[Ai] * xp[xi + xoff2] -
444                             Ap3[Ai] * xp[xi + xoff3]) / Ap[Ai];
445                      }
446                      hypre_RedBlackLoopEnd();
447 #undef DEVICE_VAR
448                      break;
449 
450                   case 3:
451                      hypre_RedBlackLoopInit();
452 #define DEVICE_VAR is_device_ptr(xp,bp,Ap0,Ap1,Ap)
453                      hypre_RedBlackLoopBegin(ni,nj,nk,redblack,
454                                              Astart,Ani,Anj,Ai,
455                                              bstart,bni,bnj,bi,
456                                              xstart,xni,xnj,xi);
457                      {
458                         xp[xi] =
459                            (bp[bi] -
460                             Ap0[Ai] * xp[xi + xoff0] -
461                             Ap1[Ai] * xp[xi + xoff1]) / Ap[Ai];
462                      }
463                      hypre_RedBlackLoopEnd();
464 #undef DEVICE_VAR
465 
466                      break;
467                }
468             }
469          }
470       }
471 
472       rb = (rb + 1) % 2;
473       iter++;
474    }
475 
476    (relax_data -> num_iterations) = iter / 2;
477 
478    /*-----------------------------------------------------------------------
479     * Return
480     *-----------------------------------------------------------------------*/
481 
482    hypre_IncFLOPCount(relax_data -> flops);
483    hypre_EndTiming(relax_data -> time_index);
484 
485    return hypre_error_flag;
486 }
487 
488 /*--------------------------------------------------------------------------
489  *--------------------------------------------------------------------------*/
490 
491 HYPRE_Int
hypre_RedBlackGSSetTol(void * relax_vdata,HYPRE_Real tol)492 hypre_RedBlackGSSetTol( void   *relax_vdata,
493                         HYPRE_Real  tol )
494 {
495    hypre_RedBlackGSData *relax_data = (hypre_RedBlackGSData *)relax_vdata;
496 
497    (relax_data -> tol) = tol;
498 
499    return hypre_error_flag;
500 }
501 
502 /*--------------------------------------------------------------------------
503  *--------------------------------------------------------------------------*/
504 
505 HYPRE_Int
hypre_RedBlackGSSetMaxIter(void * relax_vdata,HYPRE_Int max_iter)506 hypre_RedBlackGSSetMaxIter( void *relax_vdata,
507                             HYPRE_Int   max_iter )
508 {
509    hypre_RedBlackGSData *relax_data = (hypre_RedBlackGSData *)relax_vdata;
510 
511    (relax_data -> max_iter) = max_iter;
512 
513    return hypre_error_flag;
514 }
515 
516 /*--------------------------------------------------------------------------
517  *--------------------------------------------------------------------------*/
518 
519 HYPRE_Int
hypre_RedBlackGSSetZeroGuess(void * relax_vdata,HYPRE_Int zero_guess)520 hypre_RedBlackGSSetZeroGuess( void *relax_vdata,
521                               HYPRE_Int   zero_guess )
522 {
523    hypre_RedBlackGSData *relax_data = (hypre_RedBlackGSData *)relax_vdata;
524 
525    (relax_data -> zero_guess) = zero_guess;
526 
527    return hypre_error_flag;
528 }
529 
530 /*--------------------------------------------------------------------------
531  *--------------------------------------------------------------------------*/
532 
533 HYPRE_Int
hypre_RedBlackGSSetStartRed(void * relax_vdata)534 hypre_RedBlackGSSetStartRed( void *relax_vdata )
535 {
536    hypre_RedBlackGSData *relax_data = (hypre_RedBlackGSData *)relax_vdata;
537 
538    (relax_data -> rb_start) = 1;
539 
540    return hypre_error_flag;
541 }
542 
543 /*--------------------------------------------------------------------------
544  *--------------------------------------------------------------------------*/
545 
546 HYPRE_Int
hypre_RedBlackGSSetStartBlack(void * relax_vdata)547 hypre_RedBlackGSSetStartBlack( void *relax_vdata )
548 {
549    hypre_RedBlackGSData *relax_data = (hypre_RedBlackGSData *)relax_vdata;
550 
551    (relax_data -> rb_start) = 0;
552 
553    return hypre_error_flag;
554 }
555 
556