1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * Structured matrix-vector multiply routine
11 *
12 *****************************************************************************/
13
14 #include "_hypre_struct_mv.h"
15 #include "_hypre_struct_mv.hpp"
16
17 /* this currently cannot be greater than 7 */
18 #ifdef MAX_DEPTH
19 #undef MAX_DEPTH
20 #endif
21 #define MAX_DEPTH 7
22
23 /*--------------------------------------------------------------------------
24 * hypre_StructMatvecData data structure
25 *--------------------------------------------------------------------------*/
26
27 typedef struct
28 {
29 hypre_StructMatrix *A;
30 hypre_StructVector *x;
31 hypre_ComputePkg *compute_pkg;
32
33 } hypre_StructMatvecData;
34
35 /*--------------------------------------------------------------------------
36 * hypre_StructMatvecCreate
37 *--------------------------------------------------------------------------*/
38
39 void *
hypre_StructMatvecCreate()40 hypre_StructMatvecCreate( )
41 {
42 hypre_StructMatvecData *matvec_data;
43
44 matvec_data = hypre_CTAlloc(hypre_StructMatvecData, 1, HYPRE_MEMORY_HOST);
45
46 return (void *) matvec_data;
47 }
48
49 /*--------------------------------------------------------------------------
50 * hypre_StructMatvecSetup
51 *--------------------------------------------------------------------------*/
52
53 HYPRE_Int
hypre_StructMatvecSetup(void * matvec_vdata,hypre_StructMatrix * A,hypre_StructVector * x)54 hypre_StructMatvecSetup( void *matvec_vdata,
55 hypre_StructMatrix *A,
56 hypre_StructVector *x )
57 {
58 hypre_StructMatvecData *matvec_data = (hypre_StructMatvecData *)matvec_vdata;
59
60 hypre_StructGrid *grid;
61 hypre_StructStencil *stencil;
62 hypre_ComputeInfo *compute_info;
63 hypre_ComputePkg *compute_pkg;
64
65 /*----------------------------------------------------------
66 * Set up the compute package
67 *----------------------------------------------------------*/
68
69 grid = hypre_StructMatrixGrid(A);
70 stencil = hypre_StructMatrixStencil(A);
71
72 hypre_CreateComputeInfo(grid, stencil, &compute_info);
73 hypre_ComputePkgCreate(compute_info, hypre_StructVectorDataSpace(x), 1,
74 grid, &compute_pkg);
75
76 /*----------------------------------------------------------
77 * Set up the matvec data structure
78 *----------------------------------------------------------*/
79
80 (matvec_data -> A) = hypre_StructMatrixRef(A);
81 (matvec_data -> x) = hypre_StructVectorRef(x);
82 (matvec_data -> compute_pkg) = compute_pkg;
83
84 return hypre_error_flag;
85 }
86
87 /*--------------------------------------------------------------------------
88 * hypre_StructMatvecCompute
89 *--------------------------------------------------------------------------*/
90
91 HYPRE_Int
hypre_StructMatvecCompute(void * matvec_vdata,HYPRE_Complex alpha,hypre_StructMatrix * A,hypre_StructVector * x,HYPRE_Complex beta,hypre_StructVector * y)92 hypre_StructMatvecCompute( void *matvec_vdata,
93 HYPRE_Complex alpha,
94 hypre_StructMatrix *A,
95 hypre_StructVector *x,
96 HYPRE_Complex beta,
97 hypre_StructVector *y )
98 {
99 hypre_StructMatvecData *matvec_data = (hypre_StructMatvecData *)matvec_vdata;
100
101 hypre_ComputePkg *compute_pkg;
102
103 hypre_CommHandle *comm_handle;
104
105 hypre_BoxArrayArray *compute_box_aa;
106 hypre_Box *y_data_box;
107
108 HYPRE_Complex *xp;
109 HYPRE_Complex *yp;
110
111 hypre_BoxArray *boxes;
112 hypre_Box *box;
113 hypre_Index loop_size;
114 hypre_IndexRef start;
115 hypre_IndexRef stride;
116
117 HYPRE_Int constant_coefficient;
118
119 HYPRE_Complex temp;
120 HYPRE_Int compute_i, i;
121
122 hypre_StructVector *x_tmp = NULL;
123
124 /*-----------------------------------------------------------------------
125 * Initialize some things
126 *-----------------------------------------------------------------------*/
127
128 constant_coefficient = hypre_StructMatrixConstantCoefficient(A);
129 if (constant_coefficient) hypre_StructVectorClearBoundGhostValues(x, 0);
130
131 compute_pkg = (matvec_data -> compute_pkg);
132
133 stride = hypre_ComputePkgStride(compute_pkg);
134
135 /*-----------------------------------------------------------------------
136 * Do (alpha == 0.0) computation
137 *-----------------------------------------------------------------------*/
138
139 if (alpha == 0.0)
140 {
141 boxes = hypre_StructGridBoxes(hypre_StructMatrixGrid(A));
142 hypre_ForBoxI(i, boxes)
143 {
144 box = hypre_BoxArrayBox(boxes, i);
145 start = hypre_BoxIMin(box);
146
147 y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);
148 yp = hypre_StructVectorBoxData(y, i);
149
150 hypre_BoxGetSize(box, loop_size);
151
152 #define DEVICE_VAR is_device_ptr(yp)
153 hypre_BoxLoop1Begin(hypre_StructVectorNDim(x), loop_size,
154 y_data_box, start, stride, yi);
155 {
156 yp[yi] *= beta;
157 }
158 hypre_BoxLoop1End(yi);
159 #undef DEVICE_VAR
160 }
161
162 return hypre_error_flag;
163 }
164
165 if (x == y)
166 {
167 x_tmp = hypre_StructVectorClone(y);
168 x = x_tmp;
169 }
170 /*-----------------------------------------------------------------------
171 * Do (alpha != 0.0) computation
172 *-----------------------------------------------------------------------*/
173
174 for (compute_i = 0; compute_i < 2; compute_i++)
175 {
176 switch(compute_i)
177 {
178 case 0:
179 {
180 xp = hypre_StructVectorData(x);
181 hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
182 compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
183
184 /*--------------------------------------------------------------
185 * initialize y= (beta/alpha)*y normally (where everything
186 * is multiplied by alpha at the end),
187 * beta*y for constant coefficient (where only Ax gets multiplied by alpha)
188 *--------------------------------------------------------------*/
189
190 if ( constant_coefficient==1 )
191 {
192 temp = beta;
193 }
194 else
195 {
196 temp = beta / alpha;
197 }
198 if (temp != 1.0)
199 {
200 boxes = hypre_StructGridBoxes(hypre_StructMatrixGrid(A));
201 hypre_ForBoxI(i, boxes)
202 {
203 box = hypre_BoxArrayBox(boxes, i);
204 start = hypre_BoxIMin(box);
205
206 y_data_box =
207 hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);
208 yp = hypre_StructVectorBoxData(y, i);
209
210 #define DEVICE_VAR is_device_ptr(yp)
211 if (temp == 0.0)
212 {
213 hypre_BoxGetSize(box, loop_size);
214
215 hypre_BoxLoop1Begin(hypre_StructVectorNDim(x), loop_size,
216 y_data_box, start, stride, yi);
217 {
218 yp[yi] = 0.0;
219 }
220 hypre_BoxLoop1End(yi);
221 }
222 else
223 {
224 hypre_BoxGetSize(box, loop_size);
225
226 hypre_BoxLoop1Begin(hypre_StructVectorNDim(x), loop_size,
227 y_data_box, start, stride, yi);
228 {
229 yp[yi] *= temp;
230 }
231 hypre_BoxLoop1End(yi);
232 }
233 #undef DEVICE_VAR
234 }
235 }
236 }
237 break;
238
239 case 1:
240 {
241 hypre_FinalizeIndtComputations(comm_handle);
242 compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
243 }
244 break;
245 }
246
247 /*--------------------------------------------------------------------
248 * y += A*x
249 *--------------------------------------------------------------------*/
250
251 switch( constant_coefficient )
252 {
253 case 0:
254 {
255 hypre_StructMatvecCC0( alpha, A, x, y, compute_box_aa, stride );
256 break;
257 }
258 case 1:
259 {
260 hypre_StructMatvecCC1( alpha, A, x, y, compute_box_aa, stride );
261 break;
262 }
263 case 2:
264 {
265 hypre_StructMatvecCC2( alpha, A, x, y, compute_box_aa, stride );
266 break;
267 }
268 }
269
270 }
271
272 if (x_tmp)
273 {
274 hypre_StructVectorDestroy(x_tmp);
275 x = y;
276 }
277
278 return hypre_error_flag;
279 }
280
281 /*--------------------------------------------------------------------------
282 * hypre_StructMatvecCC0
283 * core of struct matvec computation, for the case constant_coefficient==0
284 * (all coefficients are variable)
285 *--------------------------------------------------------------------------*/
286
hypre_StructMatvecCC0(HYPRE_Complex alpha,hypre_StructMatrix * A,hypre_StructVector * x,hypre_StructVector * y,hypre_BoxArrayArray * compute_box_aa,hypre_IndexRef stride)287 HYPRE_Int hypre_StructMatvecCC0( HYPRE_Complex alpha,
288 hypre_StructMatrix *A,
289 hypre_StructVector *x,
290 hypre_StructVector *y,
291 hypre_BoxArrayArray *compute_box_aa,
292 hypre_IndexRef stride
293 )
294 {
295 HYPRE_Int i, j, si;
296 HYPRE_Complex *Ap0;
297 HYPRE_Complex *Ap1;
298 HYPRE_Complex *Ap2;
299 HYPRE_Complex *Ap3;
300 HYPRE_Complex *Ap4;
301 HYPRE_Complex *Ap5;
302 HYPRE_Complex *Ap6;
303 HYPRE_Int xoff0;
304 HYPRE_Int xoff1;
305 HYPRE_Int xoff2;
306 HYPRE_Int xoff3;
307 HYPRE_Int xoff4;
308 HYPRE_Int xoff5;
309 HYPRE_Int xoff6;
310 hypre_BoxArray *compute_box_a;
311 hypre_Box *compute_box;
312
313 hypre_Box *A_data_box;
314 hypre_Box *x_data_box;
315 hypre_StructStencil *stencil;
316 hypre_Index *stencil_shape;
317 HYPRE_Int stencil_size;
318
319 hypre_Box *y_data_box;
320 HYPRE_Complex *xp;
321 HYPRE_Complex *yp;
322 HYPRE_Int depth;
323 hypre_Index loop_size;
324 hypre_IndexRef start;
325 HYPRE_Int ndim;
326
327 stencil = hypre_StructMatrixStencil(A);
328 stencil_shape = hypre_StructStencilShape(stencil);
329 stencil_size = hypre_StructStencilSize(stencil);
330 ndim = hypre_StructVectorNDim(x);
331
332 hypre_ForBoxArrayI(i, compute_box_aa)
333 {
334 compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
335
336 A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
337 x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
338 y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);
339
340 xp = hypre_StructVectorBoxData(x, i);
341 yp = hypre_StructVectorBoxData(y, i);
342
343 hypre_ForBoxI(j, compute_box_a)
344 {
345 compute_box = hypre_BoxArrayBox(compute_box_a, j);
346
347 hypre_BoxGetSize(compute_box, loop_size);
348 start = hypre_BoxIMin(compute_box);
349
350 /* unroll up to depth MAX_DEPTH */
351 for (si = 0; si < stencil_size; si+= MAX_DEPTH)
352 {
353 depth = hypre_min(MAX_DEPTH, (stencil_size -si));
354 switch(depth)
355 {
356 case 7:
357 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
358 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
359 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
360 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
361 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
362 Ap5 = hypre_StructMatrixBoxData(A, i, si+5);
363 Ap6 = hypre_StructMatrixBoxData(A, i, si+6);
364
365 xoff0 = hypre_BoxOffsetDistance(x_data_box,
366 stencil_shape[si+0]);
367 xoff1 = hypre_BoxOffsetDistance(x_data_box,
368 stencil_shape[si+1]);
369 xoff2 = hypre_BoxOffsetDistance(x_data_box,
370 stencil_shape[si+2]);
371 xoff3 = hypre_BoxOffsetDistance(x_data_box,
372 stencil_shape[si+3]);
373 xoff4 = hypre_BoxOffsetDistance(x_data_box,
374 stencil_shape[si+4]);
375 xoff5 = hypre_BoxOffsetDistance(x_data_box,
376 stencil_shape[si+5]);
377 xoff6 = hypre_BoxOffsetDistance(x_data_box,
378 stencil_shape[si+6]);
379
380 #define DEVICE_VAR is_device_ptr(yp,Ap0,Ap1,Ap2,Ap3,Ap4,Ap5,Ap6,xp)
381 hypre_BoxLoop3Begin(ndim, loop_size,
382 A_data_box, start, stride, Ai,
383 x_data_box, start, stride, xi,
384 y_data_box, start, stride, yi);
385 {
386 yp[yi] +=
387 Ap0[Ai] * xp[xi + xoff0] +
388 Ap1[Ai] * xp[xi + xoff1] +
389 Ap2[Ai] * xp[xi + xoff2] +
390 Ap3[Ai] * xp[xi + xoff3] +
391 Ap4[Ai] * xp[xi + xoff4] +
392 Ap5[Ai] * xp[xi + xoff5] +
393 Ap6[Ai] * xp[xi + xoff6];
394 }
395 hypre_BoxLoop3End(Ai, xi, yi);
396 #undef DEVICE_VAR
397
398 break;
399
400 case 6:
401 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
402 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
403 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
404 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
405 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
406 Ap5 = hypre_StructMatrixBoxData(A, i, si+5);
407
408 xoff0 = hypre_BoxOffsetDistance(x_data_box,
409 stencil_shape[si+0]);
410 xoff1 = hypre_BoxOffsetDistance(x_data_box,
411 stencil_shape[si+1]);
412 xoff2 = hypre_BoxOffsetDistance(x_data_box,
413 stencil_shape[si+2]);
414 xoff3 = hypre_BoxOffsetDistance(x_data_box,
415 stencil_shape[si+3]);
416 xoff4 = hypre_BoxOffsetDistance(x_data_box,
417 stencil_shape[si+4]);
418 xoff5 = hypre_BoxOffsetDistance(x_data_box,
419 stencil_shape[si+5]);
420
421 #define DEVICE_VAR is_device_ptr(yp,Ap0,Ap1,Ap2,Ap3,Ap4,Ap5,xp)
422 hypre_BoxLoop3Begin(ndim, loop_size,
423 A_data_box, start, stride, Ai,
424 x_data_box, start, stride, xi,
425 y_data_box, start, stride, yi);
426 {
427 yp[yi] +=
428 Ap0[Ai] * xp[xi + xoff0] +
429 Ap1[Ai] * xp[xi + xoff1] +
430 Ap2[Ai] * xp[xi + xoff2] +
431 Ap3[Ai] * xp[xi + xoff3] +
432 Ap4[Ai] * xp[xi + xoff4] +
433 Ap5[Ai] * xp[xi + xoff5];
434 }
435 hypre_BoxLoop3End(Ai, xi, yi);
436 #undef DEVICE_VAR
437
438 break;
439
440 case 5:
441 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
442 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
443 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
444 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
445 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
446
447 xoff0 = hypre_BoxOffsetDistance(x_data_box,
448 stencil_shape[si+0]);
449 xoff1 = hypre_BoxOffsetDistance(x_data_box,
450 stencil_shape[si+1]);
451 xoff2 = hypre_BoxOffsetDistance(x_data_box,
452 stencil_shape[si+2]);
453 xoff3 = hypre_BoxOffsetDistance(x_data_box,
454 stencil_shape[si+3]);
455 xoff4 = hypre_BoxOffsetDistance(x_data_box,
456 stencil_shape[si+4]);
457
458 #define DEVICE_VAR is_device_ptr(yp,Ap0,Ap1,Ap2,Ap3,Ap4,xp)
459 hypre_BoxLoop3Begin(ndim, loop_size,
460 A_data_box, start, stride, Ai,
461 x_data_box, start, stride, xi,
462 y_data_box, start, stride, yi);
463 {
464 yp[yi] +=
465 Ap0[Ai] * xp[xi + xoff0] +
466 Ap1[Ai] * xp[xi + xoff1] +
467 Ap2[Ai] * xp[xi + xoff2] +
468 Ap3[Ai] * xp[xi + xoff3] +
469 Ap4[Ai] * xp[xi + xoff4];
470 }
471 hypre_BoxLoop3End(Ai, xi, yi);
472 #undef DEVICE_VAR
473
474 break;
475
476 case 4:
477 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
478 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
479 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
480 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
481
482 xoff0 = hypre_BoxOffsetDistance(x_data_box,
483 stencil_shape[si+0]);
484 xoff1 = hypre_BoxOffsetDistance(x_data_box,
485 stencil_shape[si+1]);
486 xoff2 = hypre_BoxOffsetDistance(x_data_box,
487 stencil_shape[si+2]);
488 xoff3 = hypre_BoxOffsetDistance(x_data_box,
489 stencil_shape[si+3]);
490
491 #define DEVICE_VAR is_device_ptr(yp,Ap0,Ap1,Ap2,Ap3,xp)
492 hypre_BoxLoop3Begin(ndim, loop_size,
493 A_data_box, start, stride, Ai,
494 x_data_box, start, stride, xi,
495 y_data_box, start, stride, yi);
496 {
497 yp[yi] +=
498 Ap0[Ai] * xp[xi + xoff0] +
499 Ap1[Ai] * xp[xi + xoff1] +
500 Ap2[Ai] * xp[xi + xoff2] +
501 Ap3[Ai] * xp[xi + xoff3];
502 }
503 hypre_BoxLoop3End(Ai, xi, yi);
504 #undef DEVICE_VAR
505
506 break;
507
508 case 3:
509 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
510 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
511 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
512
513 xoff0 = hypre_BoxOffsetDistance(x_data_box,
514 stencil_shape[si+0]);
515 xoff1 = hypre_BoxOffsetDistance(x_data_box,
516 stencil_shape[si+1]);
517 xoff2 = hypre_BoxOffsetDistance(x_data_box,
518 stencil_shape[si+2]);
519
520 #define DEVICE_VAR is_device_ptr(yp,Ap0,Ap1,Ap2,xp)
521 hypre_BoxLoop3Begin(ndim, loop_size,
522 A_data_box, start, stride, Ai,
523 x_data_box, start, stride, xi,
524 y_data_box, start, stride, yi);
525 {
526 yp[yi] +=
527 Ap0[Ai] * xp[xi + xoff0] +
528 Ap1[Ai] * xp[xi + xoff1] +
529 Ap2[Ai] * xp[xi + xoff2];
530 }
531 hypre_BoxLoop3End(Ai, xi, yi);
532 #undef DEVICE_VAR
533
534 break;
535
536 case 2:
537 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
538 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
539
540 xoff0 = hypre_BoxOffsetDistance(x_data_box,
541 stencil_shape[si+0]);
542 xoff1 = hypre_BoxOffsetDistance(x_data_box,
543 stencil_shape[si+1]);
544
545 #define DEVICE_VAR is_device_ptr(yp,Ap0,Ap1,xp)
546 hypre_BoxLoop3Begin(ndim, loop_size,
547 A_data_box, start, stride, Ai,
548 x_data_box, start, stride, xi,
549 y_data_box, start, stride, yi);
550 {
551 yp[yi] +=
552 Ap0[Ai] * xp[xi + xoff0] +
553 Ap1[Ai] * xp[xi + xoff1];
554 }
555 hypre_BoxLoop3End(Ai, xi, yi);
556 #undef DEVICE_VAR
557
558 break;
559
560 case 1:
561 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
562
563 xoff0 = hypre_BoxOffsetDistance(x_data_box,
564 stencil_shape[si+0]);
565
566 #define DEVICE_VAR is_device_ptr(yp,Ap0,xp)
567 hypre_BoxLoop3Begin(ndim, loop_size,
568 A_data_box, start, stride, Ai,
569 x_data_box, start, stride, xi,
570 y_data_box, start, stride, yi);
571 {
572 yp[yi] +=
573 Ap0[Ai] * xp[xi + xoff0];
574 }
575 hypre_BoxLoop3End(Ai, xi, yi);
576 #undef DEVICE_VAR
577
578 break;
579 }
580 }
581
582 if (alpha != 1.0)
583 {
584 #define DEVICE_VAR is_device_ptr(yp)
585 hypre_BoxLoop1Begin(ndim, loop_size,
586 y_data_box, start, stride, yi);
587 {
588 yp[yi] *= alpha;
589 }
590 hypre_BoxLoop1End(yi);
591 #undef DEVICE_VAR
592 }
593 }
594 }
595
596 return hypre_error_flag;
597 }
598
599
600 /*--------------------------------------------------------------------------
601 * hypre_StructMatvecCC1
602 * core of struct matvec computation, for the case constant_coefficient==1
603 *--------------------------------------------------------------------------*/
604
hypre_StructMatvecCC1(HYPRE_Complex alpha,hypre_StructMatrix * A,hypre_StructVector * x,hypre_StructVector * y,hypre_BoxArrayArray * compute_box_aa,hypre_IndexRef stride)605 HYPRE_Int hypre_StructMatvecCC1( HYPRE_Complex alpha,
606 hypre_StructMatrix *A,
607 hypre_StructVector *x,
608 hypre_StructVector *y,
609 hypre_BoxArrayArray *compute_box_aa,
610 hypre_IndexRef stride
611 )
612 {
613 HYPRE_Int i, j, si;
614 HYPRE_Complex *Ap0;
615 HYPRE_Complex *Ap1;
616 HYPRE_Complex *Ap2;
617 HYPRE_Complex *Ap3;
618 HYPRE_Complex *Ap4;
619 HYPRE_Complex *Ap5;
620 HYPRE_Complex *Ap6;
621 HYPRE_Complex AAp0;
622 HYPRE_Complex AAp1;
623 HYPRE_Complex AAp2;
624 HYPRE_Complex AAp3;
625 HYPRE_Complex AAp4;
626 HYPRE_Complex AAp5;
627 HYPRE_Complex AAp6;
628 HYPRE_Int xoff0;
629 HYPRE_Int xoff1;
630 HYPRE_Int xoff2;
631 HYPRE_Int xoff3;
632 HYPRE_Int xoff4;
633 HYPRE_Int xoff5;
634 HYPRE_Int xoff6;
635 HYPRE_Int Ai;
636
637 hypre_BoxArray *compute_box_a;
638 hypre_Box *compute_box;
639
640 hypre_Box *x_data_box;
641 hypre_StructStencil *stencil;
642 hypre_Index *stencil_shape;
643 HYPRE_Int stencil_size;
644
645 hypre_Box *y_data_box;
646 HYPRE_Complex *xp;
647 HYPRE_Complex *yp;
648 HYPRE_Int depth;
649 hypre_Index loop_size;
650 hypre_IndexRef start;
651 HYPRE_Int ndim;
652
653 stencil = hypre_StructMatrixStencil(A);
654 stencil_shape = hypre_StructStencilShape(stencil);
655 stencil_size = hypre_StructStencilSize(stencil);
656 ndim = hypre_StructVectorNDim(x);
657
658 hypre_ForBoxArrayI(i, compute_box_aa)
659 {
660 compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
661
662 x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
663 y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);
664
665 xp = hypre_StructVectorBoxData(x, i);
666 yp = hypre_StructVectorBoxData(y, i);
667
668 hypre_ForBoxI(j, compute_box_a)
669 {
670 compute_box = hypre_BoxArrayBox(compute_box_a, j);
671
672 hypre_BoxGetSize(compute_box, loop_size);
673 start = hypre_BoxIMin(compute_box);
674
675 Ai = 0;
676
677 /* unroll up to depth MAX_DEPTH */
678 for (si = 0; si < stencil_size; si+= MAX_DEPTH)
679 {
680 depth = hypre_min(MAX_DEPTH, (stencil_size -si));
681 switch(depth)
682 {
683 case 7:
684 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
685 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
686 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
687 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
688 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
689 Ap5 = hypre_StructMatrixBoxData(A, i, si+5);
690 Ap6 = hypre_StructMatrixBoxData(A, i, si+6);
691 AAp0 = Ap0[Ai]*alpha;
692 AAp1 = Ap1[Ai]*alpha;
693 AAp2 = Ap2[Ai]*alpha;
694 AAp3 = Ap3[Ai]*alpha;
695 AAp4 = Ap4[Ai]*alpha;
696 AAp5 = Ap5[Ai]*alpha;
697 AAp6 = Ap6[Ai]*alpha;
698
699 xoff0 = hypre_BoxOffsetDistance(x_data_box,
700 stencil_shape[si+0]);
701 xoff1 = hypre_BoxOffsetDistance(x_data_box,
702 stencil_shape[si+1]);
703 xoff2 = hypre_BoxOffsetDistance(x_data_box,
704 stencil_shape[si+2]);
705 xoff3 = hypre_BoxOffsetDistance(x_data_box,
706 stencil_shape[si+3]);
707 xoff4 = hypre_BoxOffsetDistance(x_data_box,
708 stencil_shape[si+4]);
709 xoff5 = hypre_BoxOffsetDistance(x_data_box,
710 stencil_shape[si+5]);
711 xoff6 = hypre_BoxOffsetDistance(x_data_box,
712 stencil_shape[si+6]);
713
714 #define DEVICE_VAR is_device_ptr(yp,xp)
715 hypre_BoxLoop2Begin(ndim, loop_size,
716 x_data_box, start, stride, xi,
717 y_data_box, start, stride, yi);
718 {
719 yp[yi] +=
720 AAp0 * xp[xi + xoff0] +
721 AAp1 * xp[xi + xoff1] +
722 AAp2 * xp[xi + xoff2] +
723 AAp3 * xp[xi + xoff3] +
724 AAp4 * xp[xi + xoff4] +
725 AAp5 * xp[xi + xoff5] +
726 AAp6 * xp[xi + xoff6];
727 }
728 hypre_BoxLoop2End(xi, yi);
729 #undef DEVICE_VAR
730 break;
731
732 case 6:
733 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
734 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
735 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
736 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
737 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
738 Ap5 = hypre_StructMatrixBoxData(A, i, si+5);
739 AAp0 = Ap0[Ai]*alpha;
740 AAp1 = Ap1[Ai]*alpha;
741 AAp2 = Ap2[Ai]*alpha;
742 AAp3 = Ap3[Ai]*alpha;
743 AAp4 = Ap4[Ai]*alpha;
744 AAp5 = Ap5[Ai]*alpha;
745
746 xoff0 = hypre_BoxOffsetDistance(x_data_box,
747 stencil_shape[si+0]);
748 xoff1 = hypre_BoxOffsetDistance(x_data_box,
749 stencil_shape[si+1]);
750 xoff2 = hypre_BoxOffsetDistance(x_data_box,
751 stencil_shape[si+2]);
752 xoff3 = hypre_BoxOffsetDistance(x_data_box,
753 stencil_shape[si+3]);
754 xoff4 = hypre_BoxOffsetDistance(x_data_box,
755 stencil_shape[si+4]);
756 xoff5 = hypre_BoxOffsetDistance(x_data_box,
757 stencil_shape[si+5]);
758
759 #define DEVICE_VAR is_device_ptr(yp,xp)
760 hypre_BoxLoop2Begin(ndim, loop_size,
761 x_data_box, start, stride, xi,
762 y_data_box, start, stride, yi);
763 {
764 yp[yi] +=
765 AAp0 * xp[xi + xoff0] +
766 AAp1 * xp[xi + xoff1] +
767 AAp2 * xp[xi + xoff2] +
768 AAp3 * xp[xi + xoff3] +
769 AAp4 * xp[xi + xoff4] +
770 AAp5 * xp[xi + xoff5];
771 }
772 hypre_BoxLoop2End(xi, yi);
773 #undef DEVICE_VAR
774 break;
775
776 case 5:
777 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
778 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
779 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
780 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
781 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
782 AAp0 = Ap0[Ai]*alpha;
783 AAp1 = Ap1[Ai]*alpha;
784 AAp2 = Ap2[Ai]*alpha;
785 AAp3 = Ap3[Ai]*alpha;
786 AAp4 = Ap4[Ai]*alpha;
787
788 xoff0 = hypre_BoxOffsetDistance(x_data_box,
789 stencil_shape[si+0]);
790 xoff1 = hypre_BoxOffsetDistance(x_data_box,
791 stencil_shape[si+1]);
792 xoff2 = hypre_BoxOffsetDistance(x_data_box,
793 stencil_shape[si+2]);
794 xoff3 = hypre_BoxOffsetDistance(x_data_box,
795 stencil_shape[si+3]);
796 xoff4 = hypre_BoxOffsetDistance(x_data_box,
797 stencil_shape[si+4]);
798
799 #define DEVICE_VAR is_device_ptr(yp,xp)
800 hypre_BoxLoop2Begin(ndim, loop_size,
801 x_data_box, start, stride, xi,
802 y_data_box, start, stride, yi);
803 {
804 yp[yi] +=
805 AAp0 * xp[xi + xoff0] +
806 AAp1 * xp[xi + xoff1] +
807 AAp2 * xp[xi + xoff2] +
808 AAp3 * xp[xi + xoff3] +
809 AAp4 * xp[xi + xoff4];
810 }
811 hypre_BoxLoop2End(xi, yi);
812 #undef DEVICE_VAR
813 break;
814
815 case 4:
816 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
817 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
818 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
819 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
820 AAp0 = Ap0[Ai]*alpha;
821 AAp1 = Ap1[Ai]*alpha;
822 AAp2 = Ap2[Ai]*alpha;
823 AAp3 = Ap3[Ai]*alpha;
824
825 xoff0 = hypre_BoxOffsetDistance(x_data_box,
826 stencil_shape[si+0]);
827 xoff1 = hypre_BoxOffsetDistance(x_data_box,
828 stencil_shape[si+1]);
829 xoff2 = hypre_BoxOffsetDistance(x_data_box,
830 stencil_shape[si+2]);
831 xoff3 = hypre_BoxOffsetDistance(x_data_box,
832 stencil_shape[si+3]);
833
834 #define DEVICE_VAR is_device_ptr(yp,xp)
835 hypre_BoxLoop2Begin(ndim, loop_size,
836 x_data_box, start, stride, xi,
837 y_data_box, start, stride, yi);
838 {
839 yp[yi] +=
840 AAp0 * xp[xi + xoff0] +
841 AAp1 * xp[xi + xoff1] +
842 AAp2 * xp[xi + xoff2] +
843 AAp3 * xp[xi + xoff3];
844 }
845 hypre_BoxLoop2End(xi, yi);
846 #undef DEVICE_VAR
847 break;
848
849 case 3:
850 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
851 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
852 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
853 AAp0 = Ap0[Ai]*alpha;
854 AAp1 = Ap1[Ai]*alpha;
855 AAp2 = Ap2[Ai]*alpha;
856
857 xoff0 = hypre_BoxOffsetDistance(x_data_box,
858 stencil_shape[si+0]);
859 xoff1 = hypre_BoxOffsetDistance(x_data_box,
860 stencil_shape[si+1]);
861 xoff2 = hypre_BoxOffsetDistance(x_data_box,
862 stencil_shape[si+2]);
863
864 #define DEVICE_VAR is_device_ptr(yp,xp)
865 hypre_BoxLoop2Begin(ndim, loop_size,
866 x_data_box, start, stride, xi,
867 y_data_box, start, stride, yi);
868 {
869 yp[yi] +=
870 AAp0 * xp[xi + xoff0] +
871 AAp1 * xp[xi + xoff1] +
872 AAp2 * xp[xi + xoff2];
873 }
874 hypre_BoxLoop2End(xi, yi);
875 #undef DEVICE_VAR
876 break;
877
878 case 2:
879 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
880 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
881 AAp0 = Ap0[Ai]*alpha;
882 AAp1 = Ap1[Ai]*alpha;
883
884 xoff0 = hypre_BoxOffsetDistance(x_data_box,
885 stencil_shape[si+0]);
886 xoff1 = hypre_BoxOffsetDistance(x_data_box,
887 stencil_shape[si+1]);
888
889 #define DEVICE_VAR is_device_ptr(yp,xp)
890 hypre_BoxLoop2Begin(ndim, loop_size,
891 x_data_box, start, stride, xi,
892 y_data_box, start, stride, yi);
893 {
894 yp[yi] +=
895 AAp0 * xp[xi + xoff0] +
896 AAp1 * xp[xi + xoff1];
897 }
898 hypre_BoxLoop2End(xi, yi);
899 #undef DEVICE_VAR
900 break;
901
902 case 1:
903 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
904 AAp0 = Ap0[Ai]*alpha;
905
906 xoff0 = hypre_BoxOffsetDistance(x_data_box,
907 stencil_shape[si+0]);
908
909 #define DEVICE_VAR is_device_ptr(yp,xp)
910 hypre_BoxLoop2Begin(ndim, loop_size,
911 x_data_box, start, stride, xi,
912 y_data_box, start, stride, yi);
913 {
914 yp[yi] +=
915 AAp0 * xp[xi + xoff0];
916 }
917 hypre_BoxLoop2End(xi, yi);
918 #undef DEVICE_VAR
919 }
920 }
921 }
922 }
923
924 return hypre_error_flag;
925 }
926
927
928 /*--------------------------------------------------------------------------
929 * hypre_StructMatvecCC2
930 * core of struct matvec computation, for the case constant_coefficient==2
931 *--------------------------------------------------------------------------*/
932
hypre_StructMatvecCC2(HYPRE_Complex alpha,hypre_StructMatrix * A,hypre_StructVector * x,hypre_StructVector * y,hypre_BoxArrayArray * compute_box_aa,hypre_IndexRef stride)933 HYPRE_Int hypre_StructMatvecCC2( HYPRE_Complex alpha,
934 hypre_StructMatrix *A,
935 hypre_StructVector *x,
936 hypre_StructVector *y,
937 hypre_BoxArrayArray *compute_box_aa,
938 hypre_IndexRef stride
939 )
940 {
941 HYPRE_Int i, j, si;
942 HYPRE_Complex *Ap0;
943 HYPRE_Complex *Ap1;
944 HYPRE_Complex *Ap2;
945 HYPRE_Complex *Ap3;
946 HYPRE_Complex *Ap4;
947 HYPRE_Complex *Ap5;
948 HYPRE_Complex *Ap6;
949 HYPRE_Complex AAp0;
950 HYPRE_Complex AAp1;
951 HYPRE_Complex AAp2;
952 HYPRE_Complex AAp3;
953 HYPRE_Complex AAp4;
954 HYPRE_Complex AAp5;
955 HYPRE_Complex AAp6;
956 HYPRE_Int xoff0;
957 HYPRE_Int xoff1;
958 HYPRE_Int xoff2;
959 HYPRE_Int xoff3;
960 HYPRE_Int xoff4;
961 HYPRE_Int xoff5;
962 HYPRE_Int xoff6;
963 HYPRE_Int si_center, center_rank;
964 hypre_Index center_index;
965 HYPRE_Int Ai_CC;
966 hypre_BoxArray *compute_box_a;
967 hypre_Box *compute_box;
968
969 hypre_Box *A_data_box;
970 hypre_Box *x_data_box;
971 hypre_StructStencil *stencil;
972 hypre_Index *stencil_shape;
973 HYPRE_Int stencil_size;
974
975 hypre_Box *y_data_box;
976 HYPRE_Complex *xp;
977 HYPRE_Complex *yp;
978 HYPRE_Int depth;
979 hypre_Index loop_size;
980 hypre_IndexRef start;
981 HYPRE_Int ndim;
982 HYPRE_Complex zero[1]={0};
983
984 stencil = hypre_StructMatrixStencil(A);
985 stencil_shape = hypre_StructStencilShape(stencil);
986 stencil_size = hypre_StructStencilSize(stencil);
987 ndim = hypre_StructVectorNDim(x);
988
989 hypre_ForBoxArrayI(i, compute_box_aa)
990 {
991 compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
992
993 A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
994 x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
995 y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);
996
997 xp = hypre_StructVectorBoxData(x, i);
998 yp = hypre_StructVectorBoxData(y, i);
999
1000 hypre_ForBoxI(j, compute_box_a)
1001 {
1002 compute_box = hypre_BoxArrayBox(compute_box_a, j);
1003
1004 hypre_BoxGetSize(compute_box, loop_size);
1005 start = hypre_BoxIMin(compute_box);
1006
1007 Ai_CC = hypre_CCBoxIndexRank( A_data_box, start );
1008
1009 /* Find the stencil index for the center of the stencil, which
1010 makes the matrix diagonal. This is the variable coefficient
1011 part of the matrix, so will get different treatment...*/
1012 hypre_SetIndex(center_index, 0);
1013 center_rank = hypre_StructStencilElementRank( stencil, center_index );
1014 si_center = center_rank;
1015
1016 /* unroll up to depth MAX_DEPTH
1017 Only the constant coefficient part of the matrix is referenced here,
1018 the center (variable) coefficient part is deferred. */
1019 for (si = 0; si < stencil_size; si+= MAX_DEPTH)
1020 {
1021 depth = hypre_min(MAX_DEPTH, (stencil_size -si));
1022 switch(depth)
1023 {
1024 case 7:
1025 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
1026 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
1027 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
1028 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
1029 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
1030 Ap5 = hypre_StructMatrixBoxData(A, i, si+5);
1031 Ap6 = hypre_StructMatrixBoxData(A, i, si+6);
1032 if ( (0 <= si_center-si) && (si_center-si < 7) )
1033 {
1034 switch ( si_center-si )
1035 {
1036 case 0: Ap0 = zero; break;
1037 case 1: Ap1 = zero; break;
1038 case 2: Ap2 = zero; break;
1039 case 3: Ap3 = zero; break;
1040 case 4: Ap4 = zero; break;
1041 case 5: Ap5 = zero; break;
1042 case 6: Ap6 = zero; break;
1043 }
1044 }
1045
1046 AAp0 = Ap0[Ai_CC];
1047 AAp1 = Ap1[Ai_CC];
1048 AAp2 = Ap2[Ai_CC];
1049 AAp3 = Ap3[Ai_CC];
1050 AAp4 = Ap4[Ai_CC];
1051 AAp5 = Ap5[Ai_CC];
1052 AAp6 = Ap6[Ai_CC];
1053
1054
1055 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1056 stencil_shape[si+0]);
1057 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1058 stencil_shape[si+0]);
1059 xoff1 = hypre_BoxOffsetDistance(x_data_box,
1060 stencil_shape[si+1]);
1061 xoff2 = hypre_BoxOffsetDistance(x_data_box,
1062 stencil_shape[si+2]);
1063 xoff3 = hypre_BoxOffsetDistance(x_data_box,
1064 stencil_shape[si+3]);
1065 xoff4 = hypre_BoxOffsetDistance(x_data_box,
1066 stencil_shape[si+4]);
1067 xoff5 = hypre_BoxOffsetDistance(x_data_box,
1068 stencil_shape[si+5]);
1069 xoff6 = hypre_BoxOffsetDistance(x_data_box,
1070 stencil_shape[si+6]);
1071
1072 #define DEVICE_VAR is_device_ptr(yp,xp)
1073 hypre_BoxLoop2Begin(ndim, loop_size,
1074 x_data_box, start, stride, xi,
1075 y_data_box, start, stride, yi);
1076 {
1077 yp[yi] +=
1078 AAp0 * xp[xi + xoff0] +
1079 AAp1 * xp[xi + xoff1] +
1080 AAp2 * xp[xi + xoff2] +
1081 AAp3 * xp[xi + xoff3] +
1082 AAp4 * xp[xi + xoff4] +
1083 AAp5 * xp[xi + xoff5] +
1084 AAp6 * xp[xi + xoff6];
1085 }
1086 hypre_BoxLoop2End(xi, yi);
1087 #undef DEVICE_VAR
1088
1089 break;
1090
1091 case 6:
1092 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
1093 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
1094 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
1095 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
1096 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
1097 Ap5 = hypre_StructMatrixBoxData(A, i, si+5);
1098 if ( (0 <= si_center-si) && (si_center-si < 6) )
1099 {
1100 switch ( si_center-si )
1101 {
1102 case 0: Ap0 = zero; break;
1103 case 1: Ap1 = zero; break;
1104 case 2: Ap2 = zero; break;
1105 case 3: Ap3 = zero; break;
1106 case 4: Ap4 = zero; break;
1107 case 5: Ap5 = zero; break;
1108 }
1109 }
1110 AAp0 = Ap0[Ai_CC];
1111 AAp1 = Ap1[Ai_CC];
1112 AAp2 = Ap2[Ai_CC];
1113 AAp3 = Ap3[Ai_CC];
1114 AAp4 = Ap4[Ai_CC];
1115 AAp5 = Ap5[Ai_CC];
1116
1117 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1118 stencil_shape[si+0]);
1119 xoff1 = hypre_BoxOffsetDistance(x_data_box,
1120 stencil_shape[si+1]);
1121 xoff2 = hypre_BoxOffsetDistance(x_data_box,
1122 stencil_shape[si+2]);
1123 xoff3 = hypre_BoxOffsetDistance(x_data_box,
1124 stencil_shape[si+3]);
1125 xoff4 = hypre_BoxOffsetDistance(x_data_box,
1126 stencil_shape[si+4]);
1127 xoff5 = hypre_BoxOffsetDistance(x_data_box,
1128 stencil_shape[si+5]);
1129
1130 #define DEVICE_VAR is_device_ptr(yp,xp)
1131 hypre_BoxLoop2Begin(ndim, loop_size,
1132 x_data_box, start, stride, xi,
1133 y_data_box, start, stride, yi);
1134 {
1135 yp[yi] +=
1136 AAp0 * xp[xi + xoff0] +
1137 AAp1 * xp[xi + xoff1] +
1138 AAp2 * xp[xi + xoff2] +
1139 AAp3 * xp[xi + xoff3] +
1140 AAp4 * xp[xi + xoff4] +
1141 AAp5 * xp[xi + xoff5];
1142 }
1143 hypre_BoxLoop2End(xi, yi);
1144 #undef DEVICE_VAR
1145 break;
1146
1147 case 5:
1148 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
1149 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
1150 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
1151 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
1152 Ap4 = hypre_StructMatrixBoxData(A, i, si+4);
1153 if ( (0 <= si_center-si) && (si_center-si < 5) )
1154 {
1155 switch ( si_center-si )
1156 {
1157 case 0: Ap0 = zero; break;
1158 case 1: Ap1 = zero; break;
1159 case 2: Ap2 = zero; break;
1160 case 3: Ap3 = zero; break;
1161 case 4: Ap4 = zero; break;
1162 }
1163 }
1164 AAp0 = Ap0[Ai_CC];
1165 AAp1 = Ap1[Ai_CC];
1166 AAp2 = Ap2[Ai_CC];
1167 AAp3 = Ap3[Ai_CC];
1168 AAp4 = Ap4[Ai_CC];
1169
1170 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1171 stencil_shape[si+0]);
1172 xoff1 = hypre_BoxOffsetDistance(x_data_box,
1173 stencil_shape[si+1]);
1174 xoff2 = hypre_BoxOffsetDistance(x_data_box,
1175 stencil_shape[si+2]);
1176 xoff3 = hypre_BoxOffsetDistance(x_data_box,
1177 stencil_shape[si+3]);
1178 xoff4 = hypre_BoxOffsetDistance(x_data_box,
1179 stencil_shape[si+4]);
1180
1181 #define DEVICE_VAR is_device_ptr(yp,xp)
1182 hypre_BoxLoop2Begin(ndim, loop_size,
1183 x_data_box, start, stride, xi,
1184 y_data_box, start, stride, yi);
1185 {
1186 yp[yi] +=
1187 AAp0 * xp[xi + xoff0] +
1188 AAp1 * xp[xi + xoff1] +
1189 AAp2 * xp[xi + xoff2] +
1190 AAp3 * xp[xi + xoff3] +
1191 AAp4 * xp[xi + xoff4];
1192 }
1193 hypre_BoxLoop2End(xi, yi);
1194 #undef DEVICE_VAR
1195 break;
1196
1197 case 4:
1198 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
1199 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
1200 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
1201 Ap3 = hypre_StructMatrixBoxData(A, i, si+3);
1202 if ( (0 <= si_center-si) && (si_center-si < 4) )
1203 {
1204 switch ( si_center-si )
1205 {
1206 case 0: Ap0 = zero; break;
1207 case 1: Ap1 = zero; break;
1208 case 2: Ap2 = zero; break;
1209 case 3: Ap3 = zero; break;
1210 }
1211 }
1212 AAp0 = Ap0[Ai_CC];
1213 AAp1 = Ap1[Ai_CC];
1214 AAp2 = Ap2[Ai_CC];
1215 AAp3 = Ap3[Ai_CC];
1216
1217 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1218 stencil_shape[si+0]);
1219 xoff1 = hypre_BoxOffsetDistance(x_data_box,
1220 stencil_shape[si+1]);
1221 xoff2 = hypre_BoxOffsetDistance(x_data_box,
1222 stencil_shape[si+2]);
1223 xoff3 = hypre_BoxOffsetDistance(x_data_box,
1224 stencil_shape[si+3]);
1225
1226 #define DEVICE_VAR is_device_ptr(yp,xp)
1227 hypre_BoxLoop2Begin(ndim, loop_size,
1228 x_data_box, start, stride, xi,
1229 y_data_box, start, stride, yi);
1230 {
1231 yp[yi] +=
1232 AAp0 * xp[xi + xoff0] +
1233 AAp1 * xp[xi + xoff1] +
1234 AAp2 * xp[xi + xoff2] +
1235 AAp3 * xp[xi + xoff3];
1236 }
1237 hypre_BoxLoop2End(xi, yi);
1238 #undef DEVICE_VAR
1239 break;
1240
1241 case 3:
1242 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
1243 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
1244 Ap2 = hypre_StructMatrixBoxData(A, i, si+2);
1245 if ( (0 <= si_center-si) && (si_center-si < 3) )
1246 {
1247 switch ( si_center-si )
1248 {
1249 case 0: Ap0 = zero; break;
1250 case 1: Ap1 = zero; break;
1251 case 2: Ap2 = zero; break;
1252 }
1253 }
1254 AAp0 = Ap0[Ai_CC];
1255 AAp1 = Ap1[Ai_CC];
1256 AAp2 = Ap2[Ai_CC];
1257
1258 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1259 stencil_shape[si+0]);
1260 xoff1 = hypre_BoxOffsetDistance(x_data_box,
1261 stencil_shape[si+1]);
1262 xoff2 = hypre_BoxOffsetDistance(x_data_box,
1263 stencil_shape[si+2]);
1264
1265 #define DEVICE_VAR is_device_ptr(yp,xp)
1266 hypre_BoxLoop2Begin(ndim, loop_size,
1267 x_data_box, start, stride, xi,
1268 y_data_box, start, stride, yi);
1269 {
1270 yp[yi] +=
1271 AAp0 * xp[xi + xoff0] +
1272 AAp1 * xp[xi + xoff1] +
1273 AAp2 * xp[xi + xoff2];
1274 }
1275 hypre_BoxLoop2End(xi, yi);
1276 #undef DEVICE_VAR
1277 break;
1278
1279 case 2:
1280 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
1281 Ap1 = hypre_StructMatrixBoxData(A, i, si+1);
1282 if ( (0 <= si_center-si) && (si_center-si < 2) )
1283 {
1284 switch ( si_center-si )
1285 {
1286 case 0: Ap0 = zero; break;
1287 case 1: Ap1 = zero; break;
1288 }
1289 }
1290 AAp0 = Ap0[Ai_CC];
1291 AAp1 = Ap1[Ai_CC];
1292
1293 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1294 stencil_shape[si+0]);
1295 xoff1 = hypre_BoxOffsetDistance(x_data_box,
1296 stencil_shape[si+1]);
1297
1298 #define DEVICE_VAR is_device_ptr(yp,xp)
1299 hypre_BoxLoop2Begin(ndim, loop_size,
1300 x_data_box, start, stride, xi,
1301 y_data_box, start, stride, yi);
1302 {
1303 yp[yi] +=
1304 AAp0 * xp[xi + xoff0] +
1305 AAp1 * xp[xi + xoff1];
1306 }
1307 hypre_BoxLoop2End(xi, yi);
1308 #undef DEVICE_VAR
1309 break;
1310
1311 case 1:
1312 Ap0 = hypre_StructMatrixBoxData(A, i, si+0);
1313 if ( si_center-si == 0 )
1314 {
1315 Ap0 = zero;
1316 }
1317 AAp0 = Ap0[Ai_CC];
1318
1319 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1320 stencil_shape[si+0]);
1321
1322 #define DEVICE_VAR is_device_ptr(yp,xp)
1323 hypre_BoxLoop2Begin(ndim, loop_size,
1324 x_data_box, start, stride, xi,
1325 y_data_box, start, stride, yi);
1326 {
1327 yp[yi] +=
1328 AAp0 * xp[xi + xoff0];
1329 }
1330 hypre_BoxLoop2End(xi, yi);
1331 #undef DEVICE_VAR
1332
1333 break;
1334 }
1335 }
1336
1337 Ap0 = hypre_StructMatrixBoxData(A, i, si_center);
1338 xoff0 = hypre_BoxOffsetDistance(x_data_box,
1339 stencil_shape[si_center]);
1340 if (alpha!= 1.0 )
1341 {
1342 #define DEVICE_VAR is_device_ptr(yp,Ap0,xp)
1343 hypre_BoxLoop3Begin(ndim, loop_size,
1344 A_data_box, start, stride, Ai,
1345 x_data_box, start, stride, xi,
1346 y_data_box, start, stride, yi);
1347 {
1348 yp[yi] = alpha * ( yp[yi] +
1349 Ap0[Ai] * xp[xi + xoff0] );
1350 }
1351 hypre_BoxLoop3End(Ai, xi, yi);
1352 #undef DEVICE_VAR
1353 }
1354 else
1355 {
1356 #define DEVICE_VAR is_device_ptr(yp,Ap0,xp)
1357 hypre_BoxLoop3Begin(ndim, loop_size,
1358 A_data_box, start, stride, Ai,
1359 x_data_box, start, stride, xi,
1360 y_data_box, start, stride, yi);
1361 {
1362 yp[yi] +=
1363 Ap0[Ai] * xp[xi + xoff0];
1364 }
1365 hypre_BoxLoop3End(Ai, xi, yi);
1366 #undef DEVICE_VAR
1367 }
1368
1369 }
1370 }
1371
1372 return hypre_error_flag;
1373 }
1374
1375
1376 /*--------------------------------------------------------------------------
1377 * hypre_StructMatvecDestroy
1378 *--------------------------------------------------------------------------*/
1379
1380 HYPRE_Int
hypre_StructMatvecDestroy(void * matvec_vdata)1381 hypre_StructMatvecDestroy( void *matvec_vdata )
1382 {
1383 hypre_StructMatvecData *matvec_data = (hypre_StructMatvecData *)matvec_vdata;
1384
1385 if (matvec_data)
1386 {
1387 hypre_StructMatrixDestroy(matvec_data -> A);
1388 hypre_StructVectorDestroy(matvec_data -> x);
1389 hypre_ComputePkgDestroy(matvec_data -> compute_pkg );
1390 hypre_TFree(matvec_data, HYPRE_MEMORY_HOST);
1391 }
1392
1393 return hypre_error_flag;
1394 }
1395
1396 /*--------------------------------------------------------------------------
1397 * hypre_StructMatvec
1398 *--------------------------------------------------------------------------*/
1399
1400 HYPRE_Int
hypre_StructMatvec(HYPRE_Complex alpha,hypre_StructMatrix * A,hypre_StructVector * x,HYPRE_Complex beta,hypre_StructVector * y)1401 hypre_StructMatvec( HYPRE_Complex alpha,
1402 hypre_StructMatrix *A,
1403 hypre_StructVector *x,
1404 HYPRE_Complex beta,
1405 hypre_StructVector *y )
1406 {
1407 void *matvec_data;
1408
1409 matvec_data = hypre_StructMatvecCreate();
1410 hypre_StructMatvecSetup(matvec_data, A, x);
1411 hypre_StructMatvecCompute(matvec_data, alpha, A, x, beta, y);
1412 hypre_StructMatvecDestroy(matvec_data);
1413
1414 return hypre_error_flag;
1415 }
1416