1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 #include "_hypre_struct_ls.h"
9
10 /*--------------------------------------------------------------------------
11 * hypre_SMGResidualData data structure
12 *--------------------------------------------------------------------------*/
13
14 typedef struct
15 {
16 hypre_Index base_index;
17 hypre_Index base_stride;
18
19 hypre_StructMatrix *A;
20 hypre_StructVector *x;
21 hypre_StructVector *b;
22 hypre_StructVector *r;
23 hypre_BoxArray *base_points;
24 hypre_ComputePkg *compute_pkg;
25
26 HYPRE_Int time_index;
27 HYPRE_BigInt flops;
28
29 } hypre_SMGResidualData;
30
31 /*--------------------------------------------------------------------------
32 * hypre_SMGResidualCreate
33 *--------------------------------------------------------------------------*/
34
35 void *
hypre_SMGResidualCreate()36 hypre_SMGResidualCreate( )
37 {
38 hypre_SMGResidualData *residual_data;
39
40 residual_data = hypre_CTAlloc(hypre_SMGResidualData, 1, HYPRE_MEMORY_HOST);
41
42 (residual_data -> time_index) = hypre_InitializeTiming("SMGResidual");
43
44 /* set defaults */
45 hypre_SetIndex3((residual_data -> base_index), 0, 0, 0);
46 hypre_SetIndex3((residual_data -> base_stride), 1, 1, 1);
47
48 return (void *) residual_data;
49 }
50
51 /*--------------------------------------------------------------------------
52 * hypre_SMGResidualSetup
53 *--------------------------------------------------------------------------*/
54
55 HYPRE_Int
hypre_SMGResidualSetup(void * residual_vdata,hypre_StructMatrix * A,hypre_StructVector * x,hypre_StructVector * b,hypre_StructVector * r)56 hypre_SMGResidualSetup( void *residual_vdata,
57 hypre_StructMatrix *A,
58 hypre_StructVector *x,
59 hypre_StructVector *b,
60 hypre_StructVector *r )
61 {
62 HYPRE_Int ierr;
63
64 hypre_SMGResidualData *residual_data = residual_vdata;
65
66 hypre_IndexRef base_index = (residual_data -> base_index);
67 hypre_IndexRef base_stride = (residual_data -> base_stride);
68 hypre_Index unit_stride;
69
70 hypre_StructGrid *grid;
71 hypre_StructStencil *stencil;
72
73 hypre_BoxArray *base_points;
74 hypre_ComputeInfo *compute_info;
75 hypre_ComputePkg *compute_pkg;
76
77 /*----------------------------------------------------------
78 * Set up base points and the compute package
79 *----------------------------------------------------------*/
80
81 grid = hypre_StructMatrixGrid(A);
82 stencil = hypre_StructMatrixStencil(A);
83
84 hypre_SetIndex3(unit_stride, 1, 1, 1);
85
86 base_points = hypre_BoxArrayDuplicate(hypre_StructGridBoxes(grid));
87 hypre_ProjectBoxArray(base_points, base_index, base_stride);
88
89 hypre_CreateComputeInfo(grid, stencil, &compute_info);
90 hypre_ComputeInfoProjectComp(compute_info, base_index, base_stride);
91 hypre_ComputePkgCreate(compute_info, hypre_StructVectorDataSpace(x), 1,
92 grid, &compute_pkg);
93
94 /*----------------------------------------------------------
95 * Set up the residual data structure
96 *----------------------------------------------------------*/
97
98 (residual_data -> A) = hypre_StructMatrixRef(A);
99 (residual_data -> x) = hypre_StructVectorRef(x);
100 (residual_data -> b) = hypre_StructVectorRef(b);
101 (residual_data -> r) = hypre_StructVectorRef(r);
102 (residual_data -> base_points) = base_points;
103 (residual_data -> compute_pkg) = compute_pkg;
104
105 /*-----------------------------------------------------
106 * Compute flops
107 *-----------------------------------------------------*/
108
109 (residual_data -> flops) =
110 (hypre_StructMatrixGlobalSize(A) + hypre_StructVectorGlobalSize(x)) /
111 (HYPRE_BigInt)(hypre_IndexX(base_stride) *
112 hypre_IndexY(base_stride) *
113 hypre_IndexZ(base_stride) );
114
115 return ierr;
116 }
117
118 /*--------------------------------------------------------------------------
119 * hypre_SMGResidual
120 *--------------------------------------------------------------------------*/
121
122 HYPRE_Int
hypre_SMGResidual(void * residual_vdata,hypre_StructMatrix * A,hypre_StructVector * x,hypre_StructVector * b,hypre_StructVector * r)123 hypre_SMGResidual( void *residual_vdata,
124 hypre_StructMatrix *A,
125 hypre_StructVector *x,
126 hypre_StructVector *b,
127 hypre_StructVector *r )
128 {
129 HYPRE_Int ierr;
130
131 hypre_SMGResidualData *residual_data = residual_vdata;
132
133 hypre_IndexRef base_stride = (residual_data -> base_stride);
134 hypre_BoxArray *base_points = (residual_data -> base_points);
135 hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg);
136
137 hypre_CommHandle *comm_handle;
138
139 hypre_BoxArrayArray *compute_box_aa;
140 hypre_BoxArray *compute_box_a;
141 hypre_Box *compute_box;
142
143 hypre_Box *A_data_box;
144 hypre_Box *x_data_box;
145 hypre_Box *b_data_box;
146 hypre_Box *r_data_box;
147
148 HYPRE_Int Ai;
149 HYPRE_Int xi;
150 HYPRE_Int bi;
151 HYPRE_Int ri;
152
153 HYPRE_Real *Ap0;
154 HYPRE_Real *xp0;
155 HYPRE_Real *bp;
156 HYPRE_Real *rp;
157
158 hypre_Index loop_size;
159 hypre_IndexRef start;
160
161 hypre_StructStencil *stencil;
162 hypre_Index *stencil_shape;
163 HYPRE_Int stencil_size;
164
165 HYPRE_Int compute_i, i, j, si;
166
167 HYPRE_Real *Ap1, *Ap2;
168 HYPRE_Real *Ap3, *Ap4;
169 HYPRE_Real *Ap5, *Ap6;
170 HYPRE_Real *Ap7, *Ap8, *Ap9;
171 HYPRE_Real *Ap10, *Ap11, *Ap12, *Ap13, *Ap14;
172 HYPRE_Real *Ap15, *Ap16, *Ap17, *Ap18;
173 HYPRE_Real *Ap19, *Ap20, *Ap21, *Ap22, *Ap23, *Ap24, *Ap25, *Ap26;
174 HYPRE_Real *xp1, *xp2;
175 HYPRE_Real *xp3, *xp4;
176 HYPRE_Real *xp5, *xp6;
177 HYPRE_Real *xp7, *xp8, *xp9;
178 HYPRE_Real *xp10, *xp11, *xp12, *xp13, *xp14;
179 HYPRE_Real *xp15, *xp16, *xp17, *xp18;
180 HYPRE_Real *xp19, *xp20, *xp21, *xp22, *xp23, *xp24, *xp25, *xp26;
181
182 hypre_BeginTiming(residual_data -> time_index);
183
184 /*-----------------------------------------------------------------------
185 * Compute residual r = b - Ax
186 *-----------------------------------------------------------------------*/
187
188 stencil = hypre_StructMatrixStencil(A);
189 stencil_shape = hypre_StructStencilShape(stencil);
190 stencil_size = hypre_StructStencilSize(stencil);
191
192 for (compute_i = 0; compute_i < 2; compute_i++)
193 {
194 switch(compute_i)
195 {
196 case 0:
197 {
198 xp0 = hypre_StructVectorData(x);
199 hypre_InitializeIndtComputations(compute_pkg, xp0, &comm_handle);
200 compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
201
202 /*----------------------------------------
203 * Copy b into r
204 *----------------------------------------*/
205
206 compute_box_a = base_points;
207 hypre_ForBoxI(i, compute_box_a)
208 {
209 compute_box = hypre_BoxArrayBox(compute_box_a, i);
210 start = hypre_BoxIMin(compute_box);
211
212 b_data_box =
213 hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
214 r_data_box =
215 hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);
216
217 bp = hypre_StructVectorBoxData(b, i);
218 rp = hypre_StructVectorBoxData(r, i);
219
220 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
221
222 #define DEVICE_VAR is_device_ptr(rp,bp)
223 hypre_BoxLoop2Begin(hypre_StructMatrixNDim(A), loop_size,
224 b_data_box, start, base_stride, bi,
225 r_data_box, start, base_stride, ri);
226 {
227 rp[ri] = bp[bi];
228 }
229 hypre_BoxLoop2End(bi, ri);
230 #undef DEVICE_VAR
231 }
232 }
233 break;
234
235 case 1:
236 {
237 hypre_FinalizeIndtComputations(comm_handle);
238 compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
239 }
240 break;
241 }
242
243 /*--------------------------------------------------------------------
244 * Compute r -= A*x
245 *--------------------------------------------------------------------*/
246
247 hypre_ForBoxArrayI(i, compute_box_aa)
248 {
249 compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
250
251 A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
252 x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
253 r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);
254
255 rp = hypre_StructVectorBoxData(r, i);
256
257 /*--------------------------------------------------------------
258 * Switch statement to direct control (based on stencil size) to
259 * code to get pointers and offsets fo A and x.
260 *--------------------------------------------------------------*/
261
262 switch (stencil_size)
263 {
264 case 1:
265
266 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
267 xp0 = hypre_StructVectorBoxData(x, i) +
268 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
269
270 break;
271
272 case 3:
273
274 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
275 Ap1 = hypre_StructMatrixBoxData(A, i, 1);
276 Ap2 = hypre_StructMatrixBoxData(A, i, 2);
277
278 xp0 = hypre_StructVectorBoxData(x, i) +
279 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
280 xp1 = hypre_StructVectorBoxData(x, i) +
281 hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
282 xp2 = hypre_StructVectorBoxData(x, i) +
283 hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
284
285 break;
286
287 case 5:
288
289 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
290 Ap1 = hypre_StructMatrixBoxData(A, i, 1);
291 Ap2 = hypre_StructMatrixBoxData(A, i, 2);
292 Ap3 = hypre_StructMatrixBoxData(A, i, 3);
293 Ap4 = hypre_StructMatrixBoxData(A, i, 4);
294
295 xp0 = hypre_StructVectorBoxData(x, i) +
296 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
297 xp1 = hypre_StructVectorBoxData(x, i) +
298 hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
299 xp2 = hypre_StructVectorBoxData(x, i) +
300 hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
301 xp3 = hypre_StructVectorBoxData(x, i) +
302 hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
303 xp4 = hypre_StructVectorBoxData(x, i) +
304 hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
305
306 break;
307
308 case 7:
309
310 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
311 Ap1 = hypre_StructMatrixBoxData(A, i, 1);
312 Ap2 = hypre_StructMatrixBoxData(A, i, 2);
313 Ap3 = hypre_StructMatrixBoxData(A, i, 3);
314 Ap4 = hypre_StructMatrixBoxData(A, i, 4);
315 Ap5 = hypre_StructMatrixBoxData(A, i, 5);
316 Ap6 = hypre_StructMatrixBoxData(A, i, 6);
317
318 xp0 = hypre_StructVectorBoxData(x, i) +
319 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
320 xp1 = hypre_StructVectorBoxData(x, i) +
321 hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
322 xp2 = hypre_StructVectorBoxData(x, i) +
323 hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
324 xp3 = hypre_StructVectorBoxData(x, i) +
325 hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
326 xp4 = hypre_StructVectorBoxData(x, i) +
327 hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
328 xp5 = hypre_StructVectorBoxData(x, i) +
329 hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
330 xp6 = hypre_StructVectorBoxData(x, i) +
331 hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
332
333 break;
334
335 case 9:
336
337 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
338 Ap1 = hypre_StructMatrixBoxData(A, i, 1);
339 Ap2 = hypre_StructMatrixBoxData(A, i, 2);
340 Ap3 = hypre_StructMatrixBoxData(A, i, 3);
341 Ap4 = hypre_StructMatrixBoxData(A, i, 4);
342 Ap5 = hypre_StructMatrixBoxData(A, i, 5);
343 Ap6 = hypre_StructMatrixBoxData(A, i, 6);
344 Ap7 = hypre_StructMatrixBoxData(A, i, 7);
345 Ap8 = hypre_StructMatrixBoxData(A, i, 8);
346
347 xp0 = hypre_StructVectorBoxData(x, i) +
348 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
349 xp1 = hypre_StructVectorBoxData(x, i) +
350 hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
351 xp2 = hypre_StructVectorBoxData(x, i) +
352 hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
353 xp3 = hypre_StructVectorBoxData(x, i) +
354 hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
355 xp4 = hypre_StructVectorBoxData(x, i) +
356 hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
357 xp5 = hypre_StructVectorBoxData(x, i) +
358 hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
359 xp6 = hypre_StructVectorBoxData(x, i) +
360 hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
361 xp7 = hypre_StructVectorBoxData(x, i) +
362 hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
363 xp8 = hypre_StructVectorBoxData(x, i) +
364 hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
365
366 break;
367
368 case 15:
369
370 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
371 Ap1 = hypre_StructMatrixBoxData(A, i, 1);
372 Ap2 = hypre_StructMatrixBoxData(A, i, 2);
373 Ap3 = hypre_StructMatrixBoxData(A, i, 3);
374 Ap4 = hypre_StructMatrixBoxData(A, i, 4);
375 Ap5 = hypre_StructMatrixBoxData(A, i, 5);
376 Ap6 = hypre_StructMatrixBoxData(A, i, 6);
377 Ap7 = hypre_StructMatrixBoxData(A, i, 7);
378 Ap8 = hypre_StructMatrixBoxData(A, i, 8);
379 Ap9 = hypre_StructMatrixBoxData(A, i, 9);
380 Ap10 = hypre_StructMatrixBoxData(A, i, 10);
381 Ap11 = hypre_StructMatrixBoxData(A, i, 11);
382 Ap12 = hypre_StructMatrixBoxData(A, i, 12);
383 Ap13 = hypre_StructMatrixBoxData(A, i, 13);
384 Ap14 = hypre_StructMatrixBoxData(A, i, 14);
385
386 xp0 = hypre_StructVectorBoxData(x, i) +
387 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
388 xp1 = hypre_StructVectorBoxData(x, i) +
389 hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
390 xp2 = hypre_StructVectorBoxData(x, i) +
391 hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
392 xp3 = hypre_StructVectorBoxData(x, i) +
393 hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
394 xp4 = hypre_StructVectorBoxData(x, i) +
395 hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
396 xp5 = hypre_StructVectorBoxData(x, i) +
397 hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
398 xp6 = hypre_StructVectorBoxData(x, i) +
399 hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
400 xp7 = hypre_StructVectorBoxData(x, i) +
401 hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
402 xp8 = hypre_StructVectorBoxData(x, i) +
403 hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
404 xp9 = hypre_StructVectorBoxData(x, i) +
405 hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
406 xp10 = hypre_StructVectorBoxData(x, i) +
407 hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
408 xp11 = hypre_StructVectorBoxData(x, i) +
409 hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
410 xp12 = hypre_StructVectorBoxData(x, i) +
411 hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
412 xp13 = hypre_StructVectorBoxData(x, i) +
413 hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
414 xp14 = hypre_StructVectorBoxData(x, i) +
415 hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
416
417 break;
418
419 case 19:
420
421 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
422 Ap1 = hypre_StructMatrixBoxData(A, i, 1);
423 Ap2 = hypre_StructMatrixBoxData(A, i, 2);
424 Ap3 = hypre_StructMatrixBoxData(A, i, 3);
425 Ap4 = hypre_StructMatrixBoxData(A, i, 4);
426 Ap5 = hypre_StructMatrixBoxData(A, i, 5);
427 Ap6 = hypre_StructMatrixBoxData(A, i, 6);
428 Ap7 = hypre_StructMatrixBoxData(A, i, 7);
429 Ap8 = hypre_StructMatrixBoxData(A, i, 8);
430 Ap9 = hypre_StructMatrixBoxData(A, i, 9);
431 Ap10 = hypre_StructMatrixBoxData(A, i, 10);
432 Ap11 = hypre_StructMatrixBoxData(A, i, 11);
433 Ap12 = hypre_StructMatrixBoxData(A, i, 12);
434 Ap13 = hypre_StructMatrixBoxData(A, i, 13);
435 Ap14 = hypre_StructMatrixBoxData(A, i, 14);
436 Ap15 = hypre_StructMatrixBoxData(A, i, 15);
437 Ap16 = hypre_StructMatrixBoxData(A, i, 16);
438 Ap17 = hypre_StructMatrixBoxData(A, i, 17);
439 Ap18 = hypre_StructMatrixBoxData(A, i, 18);
440
441 xp0 = hypre_StructVectorBoxData(x, i) +
442 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
443 xp1 = hypre_StructVectorBoxData(x, i) +
444 hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
445 xp2 = hypre_StructVectorBoxData(x, i) +
446 hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
447 xp3 = hypre_StructVectorBoxData(x, i) +
448 hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
449 xp4 = hypre_StructVectorBoxData(x, i) +
450 hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
451 xp5 = hypre_StructVectorBoxData(x, i) +
452 hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
453 xp6 = hypre_StructVectorBoxData(x, i) +
454 hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
455 xp7 = hypre_StructVectorBoxData(x, i) +
456 hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
457 xp8 = hypre_StructVectorBoxData(x, i) +
458 hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
459 xp9 = hypre_StructVectorBoxData(x, i) +
460 hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
461 xp10 = hypre_StructVectorBoxData(x, i) +
462 hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
463 xp11 = hypre_StructVectorBoxData(x, i) +
464 hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
465 xp12 = hypre_StructVectorBoxData(x, i) +
466 hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
467 xp13 = hypre_StructVectorBoxData(x, i) +
468 hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
469 xp14 = hypre_StructVectorBoxData(x, i) +
470 hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
471 xp15 = hypre_StructVectorBoxData(x, i) +
472 hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
473 xp16 = hypre_StructVectorBoxData(x, i) +
474 hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
475 xp17 = hypre_StructVectorBoxData(x, i) +
476 hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
477 xp18 = hypre_StructVectorBoxData(x, i) +
478 hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);
479
480 break;
481
482 case 27:
483
484 Ap0 = hypre_StructMatrixBoxData(A, i, 0);
485 Ap1 = hypre_StructMatrixBoxData(A, i, 1);
486 Ap2 = hypre_StructMatrixBoxData(A, i, 2);
487 Ap3 = hypre_StructMatrixBoxData(A, i, 3);
488 Ap4 = hypre_StructMatrixBoxData(A, i, 4);
489 Ap5 = hypre_StructMatrixBoxData(A, i, 5);
490 Ap6 = hypre_StructMatrixBoxData(A, i, 6);
491 Ap7 = hypre_StructMatrixBoxData(A, i, 7);
492 Ap8 = hypre_StructMatrixBoxData(A, i, 8);
493 Ap9 = hypre_StructMatrixBoxData(A, i, 9);
494 Ap10 = hypre_StructMatrixBoxData(A, i, 10);
495 Ap11 = hypre_StructMatrixBoxData(A, i, 11);
496 Ap12 = hypre_StructMatrixBoxData(A, i, 12);
497 Ap13 = hypre_StructMatrixBoxData(A, i, 13);
498 Ap14 = hypre_StructMatrixBoxData(A, i, 14);
499 Ap15 = hypre_StructMatrixBoxData(A, i, 15);
500 Ap16 = hypre_StructMatrixBoxData(A, i, 16);
501 Ap17 = hypre_StructMatrixBoxData(A, i, 17);
502 Ap18 = hypre_StructMatrixBoxData(A, i, 18);
503 Ap19 = hypre_StructMatrixBoxData(A, i, 19);
504 Ap20 = hypre_StructMatrixBoxData(A, i, 20);
505 Ap21 = hypre_StructMatrixBoxData(A, i, 21);
506 Ap22 = hypre_StructMatrixBoxData(A, i, 22);
507 Ap23 = hypre_StructMatrixBoxData(A, i, 23);
508 Ap24 = hypre_StructMatrixBoxData(A, i, 24);
509 Ap25 = hypre_StructMatrixBoxData(A, i, 25);
510 Ap26 = hypre_StructMatrixBoxData(A, i, 26);
511
512 xp0 = hypre_StructVectorBoxData(x, i) +
513 hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
514 xp1 = hypre_StructVectorBoxData(x, i) +
515 hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
516 xp2 = hypre_StructVectorBoxData(x, i) +
517 hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
518 xp3 = hypre_StructVectorBoxData(x, i) +
519 hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
520 xp4 = hypre_StructVectorBoxData(x, i) +
521 hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
522 xp5 = hypre_StructVectorBoxData(x, i) +
523 hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
524 xp6 = hypre_StructVectorBoxData(x, i) +
525 hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
526 xp7 = hypre_StructVectorBoxData(x, i) +
527 hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
528 xp8 = hypre_StructVectorBoxData(x, i) +
529 hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
530 xp9 = hypre_StructVectorBoxData(x, i) +
531 hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
532 xp10 = hypre_StructVectorBoxData(x, i) +
533 hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
534 xp11 = hypre_StructVectorBoxData(x, i) +
535 hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
536 xp12 = hypre_StructVectorBoxData(x, i) +
537 hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
538 xp13 = hypre_StructVectorBoxData(x, i) +
539 hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
540 xp14 = hypre_StructVectorBoxData(x, i) +
541 hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
542 xp15 = hypre_StructVectorBoxData(x, i) +
543 hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
544 xp16 = hypre_StructVectorBoxData(x, i) +
545 hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
546 xp17 = hypre_StructVectorBoxData(x, i) +
547 hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
548 xp18 = hypre_StructVectorBoxData(x, i) +
549 hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);
550 xp19 = hypre_StructVectorBoxData(x, i) +
551 hypre_BoxOffsetDistance(x_data_box, stencil_shape[19]);
552 xp20 = hypre_StructVectorBoxData(x, i) +
553 hypre_BoxOffsetDistance(x_data_box, stencil_shape[20]);
554 xp21 = hypre_StructVectorBoxData(x, i) +
555 hypre_BoxOffsetDistance(x_data_box, stencil_shape[21]);
556 xp22 = hypre_StructVectorBoxData(x, i) +
557 hypre_BoxOffsetDistance(x_data_box, stencil_shape[22]);
558 xp23 = hypre_StructVectorBoxData(x, i) +
559 hypre_BoxOffsetDistance(x_data_box, stencil_shape[23]);
560 xp24 = hypre_StructVectorBoxData(x, i) +
561 hypre_BoxOffsetDistance(x_data_box, stencil_shape[24]);
562 xp25 = hypre_StructVectorBoxData(x, i) +
563 hypre_BoxOffsetDistance(x_data_box, stencil_shape[25]);
564 xp26 = hypre_StructVectorBoxData(x, i) +
565 hypre_BoxOffsetDistance(x_data_box, stencil_shape[26]);
566
567 break;
568
569 default:
570 ;
571 }
572
573 hypre_ForBoxI(j, compute_box_a)
574 {
575 compute_box = hypre_BoxArrayBox(compute_box_a, j);
576
577 start = hypre_BoxIMin(compute_box);
578
579 /*------------------------------------------------------
580 * Switch statement to direct control to appropriate
581 * box loop depending on stencil size
582 *------------------------------------------------------*/
583
584 switch (stencil_size)
585 {
586
587 case 1:
588
589 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
590
591 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0)
592 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
593 A_data_box, start, base_stride, Ai,
594 x_data_box, start, base_stride, xi,
595 r_data_box, start, base_stride, ri);
596 {
597
598 rp[ri] = rp[ri]
599 - Ap0[Ai] * xp0[xi];
600
601 }
602 hypre_BoxLoop3End(Ai, xi, ri);
603 #undef DEVICE_VAR
604
605 break;
606
607 case 3:
608
609 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
610
611 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0,Ap1,xp1,Ap2,xp2)
612 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
613 A_data_box, start, base_stride, Ai,
614 x_data_box, start, base_stride, xi,
615 r_data_box, start, base_stride, ri);
616 {
617
618 rp[ri] = rp[ri]
619 - Ap0[Ai] * xp0[xi]
620 - Ap1[Ai] * xp1[xi]
621 - Ap2[Ai] * xp2[xi];
622
623 }
624 hypre_BoxLoop3End(Ai, xi, ri);
625 #undef DEVICE_VAR
626
627 break;
628
629 case 5:
630
631 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
632
633 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0,Ap1,xp1,Ap2,xp2,Ap3,xp3,Ap4,xp4)
634 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
635 A_data_box, start, base_stride, Ai,
636 x_data_box, start, base_stride, xi,
637 r_data_box, start, base_stride, ri);
638 {
639
640 rp[ri] = rp[ri]
641 - Ap0[Ai] * xp0[xi]
642 - Ap1[Ai] * xp1[xi]
643 - Ap2[Ai] * xp2[xi]
644 - Ap3[Ai] * xp3[xi]
645 - Ap4[Ai] * xp4[xi];
646
647 }
648 hypre_BoxLoop3End(Ai, xi, ri);
649 #undef DEVICE_VAR
650
651 break;
652
653 case 7:
654
655 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
656
657 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0,Ap1,xp1,Ap2,xp2,Ap3,xp3,Ap4,xp4,Ap5,xp5,Ap6,xp6)
658 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
659 A_data_box, start, base_stride, Ai,
660 x_data_box, start, base_stride, xi,
661 r_data_box, start, base_stride, ri);
662 {
663
664 rp[ri] = rp[ri]
665 - Ap0[Ai] * xp0[xi]
666 - Ap1[Ai] * xp1[xi]
667 - Ap2[Ai] * xp2[xi]
668 - Ap3[Ai] * xp3[xi]
669 - Ap4[Ai] * xp4[xi]
670 - Ap5[Ai] * xp5[xi]
671 - Ap6[Ai] * xp6[xi];
672
673 }
674 hypre_BoxLoop3End(Ai, xi, ri);
675 #undef DEVICE_VAR
676
677 break;
678
679 case 9:
680
681 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
682
683 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0,Ap1,xp1,Ap2,xp2,Ap3,xp3,Ap4,xp4,Ap5,xp5,Ap6,xp6,Ap7,xp7,Ap8,xp8)
684 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
685 A_data_box, start, base_stride, Ai,
686 x_data_box, start, base_stride, xi,
687 r_data_box, start, base_stride, ri);
688 {
689
690 rp[ri] = rp[ri]
691 - Ap0[Ai] * xp0[xi]
692 - Ap1[Ai] * xp1[xi]
693 - Ap2[Ai] * xp2[xi]
694 - Ap3[Ai] * xp3[xi]
695 - Ap4[Ai] * xp4[xi]
696 - Ap5[Ai] * xp5[xi]
697 - Ap6[Ai] * xp6[xi]
698 - Ap7[Ai] * xp7[xi]
699 - Ap8[Ai] * xp8[xi];
700
701 }
702 hypre_BoxLoop3End(Ai, xi, ri);
703 #undef DEVICE_VAR
704
705 break;
706
707 case 15:
708
709 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
710
711 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0,Ap1,xp1,Ap2,xp2,Ap3,xp3,Ap4,xp4,Ap5,xp5,Ap6,xp6,Ap7,xp7,Ap8,xp8,Ap9,xp9,Ap10,xp10,Ap11,xp11,Ap12,xp12,Ap13,xp13,Ap14,xp14)
712 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
713 A_data_box, start, base_stride, Ai,
714 x_data_box, start, base_stride, xi,
715 r_data_box, start, base_stride, ri);
716 {
717
718 rp[ri] = rp[ri]
719 - Ap0[Ai] * xp0[xi]
720 - Ap1[Ai] * xp1[xi]
721 - Ap2[Ai] * xp2[xi]
722 - Ap3[Ai] * xp3[xi]
723 - Ap4[Ai] * xp4[xi]
724 - Ap5[Ai] * xp5[xi]
725 - Ap6[Ai] * xp6[xi]
726 - Ap7[Ai] * xp7[xi]
727 - Ap8[Ai] * xp8[xi]
728 - Ap9[Ai] * xp9[xi]
729 - Ap10[Ai] * xp10[xi]
730 - Ap11[Ai] * xp11[xi]
731 - Ap12[Ai] * xp12[xi]
732 - Ap13[Ai] * xp13[xi]
733 - Ap14[Ai] * xp14[xi];
734
735 }
736 hypre_BoxLoop3End(Ai, xi, ri);
737 #undef DEVICE_VAR
738
739 break;
740
741 case 19:
742
743 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
744
745 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0,Ap1,xp1,Ap2,xp2,Ap3,xp3,Ap4,xp4,Ap5,xp5,Ap6,xp6,Ap7,xp7,Ap8,xp8,Ap9,xp9,Ap10,xp10,Ap11,xp11,Ap12,xp12,Ap13,xp13,Ap14,xp14,Ap15,xp15,Ap16,xp16,Ap17,xp17,Ap18,xp18)
746 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
747 A_data_box, start, base_stride, Ai,
748 x_data_box, start, base_stride, xi,
749 r_data_box, start, base_stride, ri);
750 {
751
752 rp[ri] = rp[ri]
753 - Ap0[Ai] * xp0[xi]
754 - Ap1[Ai] * xp1[xi]
755 - Ap2[Ai] * xp2[xi]
756 - Ap3[Ai] * xp3[xi]
757 - Ap4[Ai] * xp4[xi]
758 - Ap5[Ai] * xp5[xi]
759 - Ap6[Ai] * xp6[xi]
760 - Ap7[Ai] * xp7[xi]
761 - Ap8[Ai] * xp8[xi]
762 - Ap9[Ai] * xp9[xi]
763 - Ap10[Ai] * xp10[xi]
764 - Ap11[Ai] * xp11[xi]
765 - Ap12[Ai] * xp12[xi]
766 - Ap13[Ai] * xp13[xi]
767 - Ap14[Ai] * xp14[xi]
768 - Ap15[Ai] * xp15[xi]
769 - Ap16[Ai] * xp16[xi]
770 - Ap17[Ai] * xp17[xi]
771 - Ap18[Ai] * xp18[xi];
772
773 }
774 hypre_BoxLoop3End(Ai, xi, ri);
775 #undef DEVICE_VAR
776
777 break;
778
779 case 27:
780
781 hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
782
783 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0,Ap1,xp1,Ap2,xp2,Ap3,xp3,Ap4,xp4,Ap5,xp5,Ap6,xp6,Ap7,xp7,Ap8,xp8,Ap9,xp9,Ap10,xp10,Ap11,xp11,Ap12,xp12,Ap13,xp13,Ap14,xp14,Ap15,xp15,Ap16,xp16,Ap17,xp17,Ap18,xp18,Ap19,xp19,Ap20,xp20,Ap21,xp21,Ap22,xp22,Ap23,xp23,Ap24,xp24,Ap25,xp25,Ap26,xp26)
784 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
785 A_data_box, start, base_stride, Ai,
786 x_data_box, start, base_stride, xi,
787 r_data_box, start, base_stride, ri);
788 {
789
790 rp[ri] = rp[ri]
791 - Ap0[Ai] * xp0[xi]
792 - Ap1[Ai] * xp1[xi]
793 - Ap2[Ai] * xp2[xi]
794 - Ap3[Ai] * xp3[xi]
795 - Ap4[Ai] * xp4[xi]
796 - Ap5[Ai] * xp5[xi]
797 - Ap6[Ai] * xp6[xi]
798 - Ap7[Ai] * xp7[xi]
799 - Ap8[Ai] * xp8[xi]
800 - Ap9[Ai] * xp9[xi]
801 - Ap10[Ai] * xp10[xi]
802 - Ap11[Ai] * xp11[xi]
803 - Ap12[Ai] * xp12[xi]
804 - Ap13[Ai] * xp13[xi]
805 - Ap14[Ai] * xp14[xi]
806 - Ap15[Ai] * xp15[xi]
807 - Ap16[Ai] * xp16[xi]
808 - Ap17[Ai] * xp17[xi]
809 - Ap18[Ai] * xp18[xi]
810 - Ap19[Ai] * xp19[xi]
811 - Ap20[Ai] * xp20[xi]
812 - Ap21[Ai] * xp21[xi]
813 - Ap22[Ai] * xp22[xi]
814 - Ap23[Ai] * xp23[xi]
815 - Ap24[Ai] * xp24[xi]
816 - Ap25[Ai] * xp25[xi]
817 - Ap26[Ai] * xp26[xi];
818
819 }
820 hypre_BoxLoop3End(Ai, xi, ri);
821 #undef DEVICE_VAR
822
823 break;
824
825 default:
826
827 for (si = 0; si < stencil_size; si++)
828 {
829 Ap0 = hypre_StructMatrixBoxData(A, i, si);
830 xp0 = hypre_StructVectorBoxData(x, i) +
831 hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]);
832
833 hypre_BoxGetStrideSize(compute_box, base_stride,
834 loop_size);
835
836 #define DEVICE_VAR is_device_ptr(rp,Ap0,xp0)
837 hypre_BoxLoop3Begin(hypre_StructMatrixNDim(A), loop_size,
838 A_data_box, start, base_stride, Ai,
839 x_data_box, start, base_stride, xi,
840 r_data_box, start, base_stride, ri);
841 {
842 rp[ri] -= Ap0[Ai] * xp0[xi];
843 }
844 hypre_BoxLoop3End(Ai, xi, ri);
845 #undef DEVICE_VAR
846 }
847 }
848 }
849 }
850 }
851
852 /*-----------------------------------------------------------------------
853 * Return
854 *-----------------------------------------------------------------------*/
855
856 hypre_IncFLOPCount(residual_data -> flops);
857 hypre_EndTiming(residual_data -> time_index);
858
859 return ierr;
860 }
861
862 /*--------------------------------------------------------------------------
863 * hypre_SMGResidualSetBase
864 *--------------------------------------------------------------------------*/
865
866 HYPRE_Int
hypre_SMGResidualSetBase(void * residual_vdata,hypre_Index base_index,hypre_Index base_stride)867 hypre_SMGResidualSetBase( void *residual_vdata,
868 hypre_Index base_index,
869 hypre_Index base_stride )
870 {
871 hypre_SMGResidualData *residual_data = residual_vdata;
872 HYPRE_Int d;
873 HYPRE_Int ierr = 0;
874
875 for (d = 0; d < 3; d++)
876 {
877 hypre_IndexD((residual_data -> base_index), d)
878 = hypre_IndexD(base_index, d);
879 hypre_IndexD((residual_data -> base_stride), d)
880 = hypre_IndexD(base_stride, d);
881 }
882
883 return ierr;
884 }
885
886 /*--------------------------------------------------------------------------
887 * hypre_SMGResidualDestroy
888 *--------------------------------------------------------------------------*/
889
890 HYPRE_Int
hypre_SMGResidualDestroy(void * residual_vdata)891 hypre_SMGResidualDestroy( void *residual_vdata )
892 {
893 HYPRE_Int ierr;
894
895 hypre_SMGResidualData *residual_data = residual_vdata;
896
897 if (residual_data)
898 {
899 hypre_StructMatrixDestroy(residual_data -> A);
900 hypre_StructVectorDestroy(residual_data -> x);
901 hypre_StructVectorDestroy(residual_data -> b);
902 hypre_StructVectorDestroy(residual_data -> r);
903 hypre_BoxArrayDestroy(residual_data -> base_points);
904 hypre_ComputePkgDestroy(residual_data -> compute_pkg );
905 hypre_FinalizeTiming(residual_data -> time_index);
906 hypre_TFree(residual_data, HYPRE_MEMORY_HOST);
907 }
908
909 return ierr;
910 }
911
912