1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /*--------------------------------------------------------------------------
9  * hypre_RedBlackGSData data structure
10  *--------------------------------------------------------------------------*/
11 
12 typedef struct
13 {
14    MPI_Comm                comm;
15 
16    HYPRE_Real              tol;                /* not yet used */
17    HYPRE_Int               max_iter;
18    HYPRE_Int               rel_change;         /* not yet used */
19    HYPRE_Int               zero_guess;
20    HYPRE_Int               rb_start;
21 
22    hypre_StructMatrix     *A;
23    hypre_StructVector     *b;
24    hypre_StructVector     *x;
25 
26    HYPRE_Int               diag_rank;
27 
28    hypre_ComputePkg       *compute_pkg;
29 
30    /* log info (always logged) */
31    HYPRE_Int               num_iterations;
32    HYPRE_Int               time_index;
33    HYPRE_Int               flops;
34 
35 } hypre_RedBlackGSData;
36 
37 #ifdef HYPRE_USING_RAJA
38 
39 #define hypre_RedBlackLoopInit()
40 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack,     \
41                                 Astart,Ani,Anj,Ai,     \
42                                 bstart,bni,bnj,bi,     \
43                                 xstart,xni,xnj,xi)     \
44 {                                                      \
45    HYPRE_Int hypre__tot = nk*nj*((ni+1)/2);            \
46    forall< hypre_raja_exec_policy >(RangeSegment(0, hypre__tot), [=] hypre_RAJA_DEVICE (HYPRE_Int idx) \
47    {                                                   \
48       HYPRE_Int idx_local = idx;                       \
49       HYPRE_Int ii,jj,kk,Ai,bi,xi;                     \
50       HYPRE_Int local_ii;                              \
51       kk = idx_local % nk;                             \
52       idx_local = idx_local / nk;                      \
53       jj = idx_local % nj;                             \
54       idx_local = idx_local / nj;                      \
55       local_ii = (kk + jj + redblack) % 2;             \
56       ii = 2*idx_local + local_ii;                     \
57       if (ii < ni)                                     \
58       {                                                \
59          Ai = Astart + kk*Anj*Ani + jj*Ani + ii;       \
60          bi = bstart + kk*bnj*bni + jj*bni + ii;       \
61          xi = xstart + kk*xnj*xni + jj*xni + ii;       \
62 
63 #define hypre_RedBlackLoopEnd()                        \
64       }                                                \
65    });                                                 \
66    hypre_fence();                                      \
67 }
68 
69 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack, \
70                                             bstart,bni,bnj,bi, \
71                                             xstart,xni,xnj,xi) \
72 {                                                              \
73    HYPRE_Int hypre__tot = nk*nj*((ni+1)/2);                    \
74    forall< hypre_raja_exec_policy >(RangeSegment(0, hypre__tot), [=] hypre_RAJA_DEVICE (HYPRE_Int idx) \
75    {                                                           \
76       HYPRE_Int idx_local = idx;                               \
77       HYPRE_Int ii,jj,kk,bi,xi;                                \
78       HYPRE_Int local_ii;                                      \
79       kk = idx_local % nk;                                     \
80       idx_local = idx_local / nk;                              \
81       jj = idx_local % nj;                                     \
82       idx_local = idx_local / nj;                              \
83       local_ii = (kk + jj + redblack) % 2;                     \
84       ii = 2*idx_local + local_ii;                             \
85       if (ii < ni)                                             \
86       {                                                        \
87           bi = bstart + kk*bnj*bni + jj*bni + ii;              \
88           xi = xstart + kk*xnj*xni + jj*xni + ii;              \
89 
90 #define hypre_RedBlackConstantcoefLoopEnd()                    \
91       }                                                        \
92    });                                                         \
93    hypre_fence();                                              \
94 }
95 
96 #elif defined(HYPRE_USING_KOKKOS)
97 
98 #define hypre_RedBlackLoopInit()
99 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack,                  \
100                                 Astart,Ani,Anj,Ai,                  \
101                                 bstart,bni,bnj,bi,                  \
102                                 xstart,xni,xnj,xi)                  \
103 {                                                                   \
104    HYPRE_Int hypre__tot = nk*nj*((ni+1)/2);                         \
105    Kokkos::parallel_for (hypre__tot, KOKKOS_LAMBDA (HYPRE_Int idx)  \
106    {                                                                \
107       HYPRE_Int idx_local = idx;                                    \
108       HYPRE_Int ii,jj,kk,Ai,bi,xi;                                  \
109       HYPRE_Int local_ii;                                           \
110       kk = idx_local % nk;                                          \
111       idx_local = idx_local / nk;                                   \
112       jj = idx_local % nj;                                          \
113       idx_local = idx_local / nj;                                   \
114       local_ii = (kk + jj + redblack) % 2;                          \
115       ii = 2*idx_local + local_ii;                                  \
116       if (ii < ni)                                                  \
117       {                                                             \
118          Ai = Astart + kk*Anj*Ani + jj*Ani + ii;                    \
119          bi = bstart + kk*bnj*bni + jj*bni + ii;                    \
120          xi = xstart + kk*xnj*xni + jj*xni + ii;                    \
121 
122 #define hypre_RedBlackLoopEnd()                                     \
123       }                                                             \
124    });                                                              \
125    hypre_fence();                                                   \
126 }
127 
128 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,      \
129                                             bstart,bni,bnj,bi,      \
130                                             xstart,xni,xnj,xi)      \
131 {                                                                   \
132    HYPRE_Int hypre__tot = nk*nj*((ni+1)/2);                         \
133    Kokkos::parallel_for (hypre__tot, KOKKOS_LAMBDA (HYPRE_Int idx)  \
134    {                                                                \
135       HYPRE_Int idx_local = idx;                                    \
136       HYPRE_Int ii,jj,kk,bi,xi;                                     \
137       HYPRE_Int local_ii;                                           \
138       kk = idx_local % nk;                                          \
139       idx_local = idx_local / nk;                                   \
140       jj = idx_local % nj;                                          \
141       idx_local = idx_local / nj;                                   \
142       local_ii = (kk + jj + redblack) % 2;                          \
143       ii = 2*idx_local + local_ii;                                  \
144       if (ii < ni)                                                  \
145       {                                                             \
146          bi = bstart + kk*bnj*bni + jj*bni + ii;                    \
147          xi = xstart + kk*xnj*xni + jj*xni + ii;                    \
148 
149 #define hypre_RedBlackConstantcoefLoopEnd()                         \
150       }                                                             \
151    });                                                              \
152    hypre_fence();                                                   \
153 }
154 
155 #elif defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
156 
157 #define hypre_RedBlackLoopInit()
158 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack,        \
159                                 Astart,Ani,Anj,Ai,        \
160                                 bstart,bni,bnj,bi,        \
161                                 xstart,xni,xnj,xi)        \
162 {                                                         \
163    HYPRE_Int hypre__tot = nk*nj*((ni+1)/2);               \
164    BoxLoopforall(hypre__tot, HYPRE_LAMBDA (HYPRE_Int idx) \
165    {                                                      \
166       HYPRE_Int idx_local = idx;                          \
167       HYPRE_Int ii,jj,kk,Ai,bi,xi;                        \
168       HYPRE_Int local_ii;                                 \
169       kk = idx_local % nk;                                \
170       idx_local = idx_local / nk;                         \
171       jj = idx_local % nj;                                \
172       idx_local = idx_local / nj;                         \
173       local_ii = (kk + jj + redblack) % 2;                \
174       ii = 2*idx_local + local_ii;                        \
175       if (ii < ni)                                        \
176       {                                                   \
177          Ai = Astart + kk*Anj*Ani + jj*Ani + ii;          \
178          bi = bstart + kk*bnj*bni + jj*bni + ii;          \
179          xi = xstart + kk*xnj*xni + jj*xni + ii;          \
180 
181 #define hypre_RedBlackLoopEnd()                           \
182       }                                                   \
183    });                                                    \
184 }
185 
186 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,      \
187                                             bstart,bni,bnj,bi,      \
188                                             xstart,xni,xnj,xi)      \
189 {                                                                   \
190    HYPRE_Int hypre__tot = nk*nj*((ni+1)/2);                         \
191    BoxLoopforall(hypre__tot, HYPRE_LAMBDA (HYPRE_Int idx)           \
192    {                                                                \
193       HYPRE_Int idx_local = idx;                                    \
194       HYPRE_Int ii,jj,kk,bi,xi;                                     \
195       HYPRE_Int local_ii;                                           \
196       kk = idx_local % nk;                                          \
197       idx_local = idx_local / nk;                                   \
198       jj = idx_local % nj;                                          \
199       idx_local = idx_local / nj;                                   \
200       local_ii = (kk + jj + redblack) % 2;                          \
201       ii = 2*idx_local + local_ii;                                  \
202       if (ii < ni)                                                  \
203       {                                                             \
204          bi = bstart + kk*bnj*bni + jj*bni + ii;                    \
205          xi = xstart + kk*xnj*xni + jj*xni + ii;                    \
206 
207 #define hypre_RedBlackConstantcoefLoopEnd()                         \
208       }                                                             \
209    });                                                              \
210 }
211 
212 #elif defined(HYPRE_USING_DEVICE_OPENMP)
213 
214 /* BEGIN OF OMP 4.5 */
215 /* #define IF_CLAUSE if (hypre__global_offload) */
216 
217 /* stringification:
218  * _Pragma(string-literal), so we need to cast argument to a string
219  * The three dots as last argument of the macro tells compiler that this is a variadic macro.
220  * I.e. this is a macro that receives variable number of arguments.
221  */
222 //#define HYPRE_STR(s...) #s
223 //#define HYPRE_XSTR(s...) HYPRE_STR(s)
224 
225 #define hypre_RedBlackLoopInit()
226 
227 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack,                      \
228                                 Astart,Ani,Anj,Ai,                      \
229                                 bstart,bni,bnj,bi,                      \
230                                 xstart,xni,xnj,xi)                      \
231 {                                                                       \
232    HYPRE_Int hypre__thread, hypre__tot = nk*nj*((ni+1)/2);              \
233    HYPRE_BOXLOOP_ENTRY_PRINT                                            \
234    /* device code: */                                                   \
235    _Pragma (HYPRE_XSTR(omp target teams distribute parallel for IF_CLAUSE IS_DEVICE_CLAUSE)) \
236    for (hypre__thread=0; hypre__thread<hypre__tot; hypre__thread++)     \
237    {                                                                    \
238         HYPRE_Int idx_local = hypre__thread;                            \
239         HYPRE_Int ii,jj,kk,Ai,bi,xi;                                    \
240         HYPRE_Int local_ii;                                             \
241         kk = idx_local % nk;                                            \
242         idx_local = idx_local / nk;                                     \
243         jj = idx_local % nj;                                            \
244         idx_local = idx_local / nj;                                     \
245         local_ii = (kk + jj + redblack) % 2;                            \
246         ii = 2*idx_local + local_ii;                                    \
247         if (ii < ni)                                                    \
248         {                                                               \
249             Ai = Astart + kk*Anj*Ani + jj*Ani + ii;                     \
250             bi = bstart + kk*bnj*bni + jj*bni + ii;                     \
251             xi = xstart + kk*xnj*xni + jj*xni + ii;                     \
252 
253 #define hypre_RedBlackLoopEnd()                                         \
254         }                                                               \
255      }                                                                  \
256 }
257 
258 
259 
260 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack,        \
261                                             bstart,bni,bnj,bi,        \
262                                             xstart,xni,xnj,xi)        \
263 {                                                                     \
264    HYPRE_Int hypre__thread, hypre__tot = nk*nj*((ni+1)/2);            \
265    HYPRE_BOXLOOP_ENTRY_PRINT                                          \
266    /* device code: */                                                 \
267    _Pragma (HYPRE_XSTR(omp target teams distribute parallel for IF_CLAUSE IS_DEVICE_CLAUSE)) \
268    for (hypre__thread=0; hypre__thread<hypre__tot; hypre__thread++)   \
269    {                                                                  \
270         HYPRE_Int idx_local = hypre__thread;                          \
271         HYPRE_Int ii,jj,kk,bi,xi;                                     \
272         HYPRE_Int local_ii;                                           \
273         kk = idx_local % nk;                                          \
274         idx_local = idx_local / nk;                                   \
275         jj = idx_local % nj;                                          \
276         idx_local = idx_local / nj;                                   \
277         local_ii = (kk + jj + redblack) % 2;                          \
278         ii = 2*idx_local + local_ii;                                  \
279         if (ii < ni)                                                  \
280         {                                                             \
281             bi = bstart + kk*bnj*bni + jj*bni + ii;                   \
282             xi = xstart + kk*xnj*xni + jj*xni + ii;                   \
283 
284 #define hypre_RedBlackConstantcoefLoopEnd()                           \
285          }                                                            \
286      }                                                                \
287 }
288 /* END OF OMP 4.5 */
289 
290 #else
291 
292 /* CPU */
293 #define HYPRE_REDBLACK_PRIVATE hypre__kk
294 
295 #define hypre_RedBlackLoopInit()\
296 {\
297    HYPRE_Int hypre__kk;
298 
299 #ifdef HYPRE_USING_OPENMP
300 #define HYPRE_BOX_REDUCTION
301 #if defined(WIN32) && defined(_MSC_VER)
302 #define Pragma(x) __pragma(HYPRE_XSTR(x))
303 #else
304 #define Pragma(x) _Pragma(HYPRE_XSTR(x))
305 #endif
306 #define OMPRB1 Pragma(omp parallel for private(HYPRE_REDBLACK_PRIVATE) HYPRE_BOX_REDUCTION HYPRE_SMP_SCHEDULE)
307 #else
308 #define OMPRB1
309 #endif
310 
311 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack,  \
312                                 Astart,Ani,Anj,Ai,  \
313                                 bstart,bni,bnj,bi,  \
314                                 xstart,xni,xnj,xi)  \
315    OMPRB1 \
316    for (hypre__kk = 0; hypre__kk < nk; hypre__kk++) \
317    {\
318       HYPRE_Int ii,jj,Ai,bi,xi;\
319       for (jj = 0; jj < nj; jj++)\
320       {\
321          ii = (hypre__kk + jj + redblack) % 2;\
322          Ai = Astart + hypre__kk*Anj*Ani + jj*Ani + ii; \
323          bi = bstart + hypre__kk*bnj*bni + jj*bni + ii; \
324          xi = xstart + hypre__kk*xnj*xni + jj*xni + ii; \
325          for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)\
326          {
327 
328 #define hypre_RedBlackLoopEnd()\
329          }\
330       }\
331    }\
332 }
333 
334 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack, \
335                                             bstart,bni,bnj,bi, \
336                                             xstart,xni,xnj,xi) \
337    OMPRB1 \
338    for (hypre__kk = 0; hypre__kk < nk; hypre__kk++)\
339    {\
340       HYPRE_Int ii,jj,bi,xi;\
341       for (jj = 0; jj < nj; jj++)\
342       {\
343          ii = (hypre__kk + jj + redblack) % 2;\
344          bi = bstart + hypre__kk*bnj*bni + jj*bni + ii;\
345          xi = xstart + hypre__kk*xnj*xni + jj*xni + ii;\
346          for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)\
347          {
348 
349 #define hypre_RedBlackConstantcoefLoopEnd()\
350          }\
351       }\
352    }\
353 }
354 #endif
355