1 /****************************************************************************** 2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other 3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details. 4 * 5 * SPDX-License-Identifier: (Apache-2.0 OR MIT) 6 ******************************************************************************/ 7 8 /*-------------------------------------------------------------------------- 9 * hypre_RedBlackGSData data structure 10 *--------------------------------------------------------------------------*/ 11 12 typedef struct 13 { 14 MPI_Comm comm; 15 16 HYPRE_Real tol; /* not yet used */ 17 HYPRE_Int max_iter; 18 HYPRE_Int rel_change; /* not yet used */ 19 HYPRE_Int zero_guess; 20 HYPRE_Int rb_start; 21 22 hypre_StructMatrix *A; 23 hypre_StructVector *b; 24 hypre_StructVector *x; 25 26 HYPRE_Int diag_rank; 27 28 hypre_ComputePkg *compute_pkg; 29 30 /* log info (always logged) */ 31 HYPRE_Int num_iterations; 32 HYPRE_Int time_index; 33 HYPRE_Int flops; 34 35 } hypre_RedBlackGSData; 36 37 #ifdef HYPRE_USING_RAJA 38 39 #define hypre_RedBlackLoopInit() 40 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack, \ 41 Astart,Ani,Anj,Ai, \ 42 bstart,bni,bnj,bi, \ 43 xstart,xni,xnj,xi) \ 44 { \ 45 HYPRE_Int hypre__tot = nk*nj*((ni+1)/2); \ 46 forall< hypre_raja_exec_policy >(RangeSegment(0, hypre__tot), [=] hypre_RAJA_DEVICE (HYPRE_Int idx) \ 47 { \ 48 HYPRE_Int idx_local = idx; \ 49 HYPRE_Int ii,jj,kk,Ai,bi,xi; \ 50 HYPRE_Int local_ii; \ 51 kk = idx_local % nk; \ 52 idx_local = idx_local / nk; \ 53 jj = idx_local % nj; \ 54 idx_local = idx_local / nj; \ 55 local_ii = (kk + jj + redblack) % 2; \ 56 ii = 2*idx_local + local_ii; \ 57 if (ii < ni) \ 58 { \ 59 Ai = Astart + kk*Anj*Ani + jj*Ani + ii; \ 60 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 61 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 62 63 #define hypre_RedBlackLoopEnd() \ 64 } \ 65 }); \ 66 hypre_fence(); \ 67 } 68 69 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack, \ 70 bstart,bni,bnj,bi, \ 71 xstart,xni,xnj,xi) \ 72 { \ 73 HYPRE_Int hypre__tot = nk*nj*((ni+1)/2); \ 74 forall< hypre_raja_exec_policy >(RangeSegment(0, hypre__tot), [=] hypre_RAJA_DEVICE (HYPRE_Int idx) \ 75 { \ 76 HYPRE_Int idx_local = idx; \ 77 HYPRE_Int ii,jj,kk,bi,xi; \ 78 HYPRE_Int local_ii; \ 79 kk = idx_local % nk; \ 80 idx_local = idx_local / nk; \ 81 jj = idx_local % nj; \ 82 idx_local = idx_local / nj; \ 83 local_ii = (kk + jj + redblack) % 2; \ 84 ii = 2*idx_local + local_ii; \ 85 if (ii < ni) \ 86 { \ 87 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 88 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 89 90 #define hypre_RedBlackConstantcoefLoopEnd() \ 91 } \ 92 }); \ 93 hypre_fence(); \ 94 } 95 96 #elif defined(HYPRE_USING_KOKKOS) 97 98 #define hypre_RedBlackLoopInit() 99 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack, \ 100 Astart,Ani,Anj,Ai, \ 101 bstart,bni,bnj,bi, \ 102 xstart,xni,xnj,xi) \ 103 { \ 104 HYPRE_Int hypre__tot = nk*nj*((ni+1)/2); \ 105 Kokkos::parallel_for (hypre__tot, KOKKOS_LAMBDA (HYPRE_Int idx) \ 106 { \ 107 HYPRE_Int idx_local = idx; \ 108 HYPRE_Int ii,jj,kk,Ai,bi,xi; \ 109 HYPRE_Int local_ii; \ 110 kk = idx_local % nk; \ 111 idx_local = idx_local / nk; \ 112 jj = idx_local % nj; \ 113 idx_local = idx_local / nj; \ 114 local_ii = (kk + jj + redblack) % 2; \ 115 ii = 2*idx_local + local_ii; \ 116 if (ii < ni) \ 117 { \ 118 Ai = Astart + kk*Anj*Ani + jj*Ani + ii; \ 119 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 120 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 121 122 #define hypre_RedBlackLoopEnd() \ 123 } \ 124 }); \ 125 hypre_fence(); \ 126 } 127 128 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack, \ 129 bstart,bni,bnj,bi, \ 130 xstart,xni,xnj,xi) \ 131 { \ 132 HYPRE_Int hypre__tot = nk*nj*((ni+1)/2); \ 133 Kokkos::parallel_for (hypre__tot, KOKKOS_LAMBDA (HYPRE_Int idx) \ 134 { \ 135 HYPRE_Int idx_local = idx; \ 136 HYPRE_Int ii,jj,kk,bi,xi; \ 137 HYPRE_Int local_ii; \ 138 kk = idx_local % nk; \ 139 idx_local = idx_local / nk; \ 140 jj = idx_local % nj; \ 141 idx_local = idx_local / nj; \ 142 local_ii = (kk + jj + redblack) % 2; \ 143 ii = 2*idx_local + local_ii; \ 144 if (ii < ni) \ 145 { \ 146 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 147 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 148 149 #define hypre_RedBlackConstantcoefLoopEnd() \ 150 } \ 151 }); \ 152 hypre_fence(); \ 153 } 154 155 #elif defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) 156 157 #define hypre_RedBlackLoopInit() 158 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack, \ 159 Astart,Ani,Anj,Ai, \ 160 bstart,bni,bnj,bi, \ 161 xstart,xni,xnj,xi) \ 162 { \ 163 HYPRE_Int hypre__tot = nk*nj*((ni+1)/2); \ 164 BoxLoopforall(hypre__tot, HYPRE_LAMBDA (HYPRE_Int idx) \ 165 { \ 166 HYPRE_Int idx_local = idx; \ 167 HYPRE_Int ii,jj,kk,Ai,bi,xi; \ 168 HYPRE_Int local_ii; \ 169 kk = idx_local % nk; \ 170 idx_local = idx_local / nk; \ 171 jj = idx_local % nj; \ 172 idx_local = idx_local / nj; \ 173 local_ii = (kk + jj + redblack) % 2; \ 174 ii = 2*idx_local + local_ii; \ 175 if (ii < ni) \ 176 { \ 177 Ai = Astart + kk*Anj*Ani + jj*Ani + ii; \ 178 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 179 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 180 181 #define hypre_RedBlackLoopEnd() \ 182 } \ 183 }); \ 184 } 185 186 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack, \ 187 bstart,bni,bnj,bi, \ 188 xstart,xni,xnj,xi) \ 189 { \ 190 HYPRE_Int hypre__tot = nk*nj*((ni+1)/2); \ 191 BoxLoopforall(hypre__tot, HYPRE_LAMBDA (HYPRE_Int idx) \ 192 { \ 193 HYPRE_Int idx_local = idx; \ 194 HYPRE_Int ii,jj,kk,bi,xi; \ 195 HYPRE_Int local_ii; \ 196 kk = idx_local % nk; \ 197 idx_local = idx_local / nk; \ 198 jj = idx_local % nj; \ 199 idx_local = idx_local / nj; \ 200 local_ii = (kk + jj + redblack) % 2; \ 201 ii = 2*idx_local + local_ii; \ 202 if (ii < ni) \ 203 { \ 204 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 205 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 206 207 #define hypre_RedBlackConstantcoefLoopEnd() \ 208 } \ 209 }); \ 210 } 211 212 #elif defined(HYPRE_USING_DEVICE_OPENMP) 213 214 /* BEGIN OF OMP 4.5 */ 215 /* #define IF_CLAUSE if (hypre__global_offload) */ 216 217 /* stringification: 218 * _Pragma(string-literal), so we need to cast argument to a string 219 * The three dots as last argument of the macro tells compiler that this is a variadic macro. 220 * I.e. this is a macro that receives variable number of arguments. 221 */ 222 //#define HYPRE_STR(s...) #s 223 //#define HYPRE_XSTR(s...) HYPRE_STR(s) 224 225 #define hypre_RedBlackLoopInit() 226 227 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack, \ 228 Astart,Ani,Anj,Ai, \ 229 bstart,bni,bnj,bi, \ 230 xstart,xni,xnj,xi) \ 231 { \ 232 HYPRE_Int hypre__thread, hypre__tot = nk*nj*((ni+1)/2); \ 233 HYPRE_BOXLOOP_ENTRY_PRINT \ 234 /* device code: */ \ 235 _Pragma (HYPRE_XSTR(omp target teams distribute parallel for IF_CLAUSE IS_DEVICE_CLAUSE)) \ 236 for (hypre__thread=0; hypre__thread<hypre__tot; hypre__thread++) \ 237 { \ 238 HYPRE_Int idx_local = hypre__thread; \ 239 HYPRE_Int ii,jj,kk,Ai,bi,xi; \ 240 HYPRE_Int local_ii; \ 241 kk = idx_local % nk; \ 242 idx_local = idx_local / nk; \ 243 jj = idx_local % nj; \ 244 idx_local = idx_local / nj; \ 245 local_ii = (kk + jj + redblack) % 2; \ 246 ii = 2*idx_local + local_ii; \ 247 if (ii < ni) \ 248 { \ 249 Ai = Astart + kk*Anj*Ani + jj*Ani + ii; \ 250 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 251 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 252 253 #define hypre_RedBlackLoopEnd() \ 254 } \ 255 } \ 256 } 257 258 259 260 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack, \ 261 bstart,bni,bnj,bi, \ 262 xstart,xni,xnj,xi) \ 263 { \ 264 HYPRE_Int hypre__thread, hypre__tot = nk*nj*((ni+1)/2); \ 265 HYPRE_BOXLOOP_ENTRY_PRINT \ 266 /* device code: */ \ 267 _Pragma (HYPRE_XSTR(omp target teams distribute parallel for IF_CLAUSE IS_DEVICE_CLAUSE)) \ 268 for (hypre__thread=0; hypre__thread<hypre__tot; hypre__thread++) \ 269 { \ 270 HYPRE_Int idx_local = hypre__thread; \ 271 HYPRE_Int ii,jj,kk,bi,xi; \ 272 HYPRE_Int local_ii; \ 273 kk = idx_local % nk; \ 274 idx_local = idx_local / nk; \ 275 jj = idx_local % nj; \ 276 idx_local = idx_local / nj; \ 277 local_ii = (kk + jj + redblack) % 2; \ 278 ii = 2*idx_local + local_ii; \ 279 if (ii < ni) \ 280 { \ 281 bi = bstart + kk*bnj*bni + jj*bni + ii; \ 282 xi = xstart + kk*xnj*xni + jj*xni + ii; \ 283 284 #define hypre_RedBlackConstantcoefLoopEnd() \ 285 } \ 286 } \ 287 } 288 /* END OF OMP 4.5 */ 289 290 #else 291 292 /* CPU */ 293 #define HYPRE_REDBLACK_PRIVATE hypre__kk 294 295 #define hypre_RedBlackLoopInit()\ 296 {\ 297 HYPRE_Int hypre__kk; 298 299 #ifdef HYPRE_USING_OPENMP 300 #define HYPRE_BOX_REDUCTION 301 #if defined(WIN32) && defined(_MSC_VER) 302 #define Pragma(x) __pragma(HYPRE_XSTR(x)) 303 #else 304 #define Pragma(x) _Pragma(HYPRE_XSTR(x)) 305 #endif 306 #define OMPRB1 Pragma(omp parallel for private(HYPRE_REDBLACK_PRIVATE) HYPRE_BOX_REDUCTION HYPRE_SMP_SCHEDULE) 307 #else 308 #define OMPRB1 309 #endif 310 311 #define hypre_RedBlackLoopBegin(ni,nj,nk,redblack, \ 312 Astart,Ani,Anj,Ai, \ 313 bstart,bni,bnj,bi, \ 314 xstart,xni,xnj,xi) \ 315 OMPRB1 \ 316 for (hypre__kk = 0; hypre__kk < nk; hypre__kk++) \ 317 {\ 318 HYPRE_Int ii,jj,Ai,bi,xi;\ 319 for (jj = 0; jj < nj; jj++)\ 320 {\ 321 ii = (hypre__kk + jj + redblack) % 2;\ 322 Ai = Astart + hypre__kk*Anj*Ani + jj*Ani + ii; \ 323 bi = bstart + hypre__kk*bnj*bni + jj*bni + ii; \ 324 xi = xstart + hypre__kk*xnj*xni + jj*xni + ii; \ 325 for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)\ 326 { 327 328 #define hypre_RedBlackLoopEnd()\ 329 }\ 330 }\ 331 }\ 332 } 333 334 #define hypre_RedBlackConstantcoefLoopBegin(ni,nj,nk,redblack, \ 335 bstart,bni,bnj,bi, \ 336 xstart,xni,xnj,xi) \ 337 OMPRB1 \ 338 for (hypre__kk = 0; hypre__kk < nk; hypre__kk++)\ 339 {\ 340 HYPRE_Int ii,jj,bi,xi;\ 341 for (jj = 0; jj < nj; jj++)\ 342 {\ 343 ii = (hypre__kk + jj + redblack) % 2;\ 344 bi = bstart + hypre__kk*bnj*bni + jj*bni + ii;\ 345 xi = xstart + hypre__kk*xnj*xni + jj*xni + ii;\ 346 for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)\ 347 { 348 349 #define hypre_RedBlackConstantcoefLoopEnd()\ 350 }\ 351 }\ 352 }\ 353 } 354 #endif 355