1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  * OpenMP Problems
10  *
11  * Are private static arrays a problem?
12  *
13  ******************************************************************************/
14 
15 /******************************************************************************
16  *  FAC composite level restriction.
17  *  Injection away from the refinement patches; constant restriction
18  *  inside patch.
19  ******************************************************************************/
20 
21 #include "_hypre_sstruct_ls.h"
22 #include "_hypre_struct_mv.hpp"
23 #include "fac.h"
24 
25 #define MapCellRank(i, j , k, rank)             \
26    {                                            \
27       rank = 4*k + 2*j + i;                     \
28    }
29 
30 #define InverseMapCellRank(rank, stencil)       \
31    {                                            \
32       HYPRE_Int ij,ii,jj,kk;                    \
33       ij = (rank%4);                            \
34       ii = (ij%2);                              \
35       jj = (ij-ii)/2;                           \
36       kk = (rank-2*jj-ii)/4;                    \
37       hypre_SetIndex3(stencil, ii, jj, kk);     \
38    }
39 
40 /*--------------------------------------------------------------------------
41  * hypre_FacSemiRestrictData data structure
42  *--------------------------------------------------------------------------*/
43 
44 typedef struct
45 {
46    HYPRE_Int             nvars;
47    hypre_Index           stride;
48 
49    hypre_SStructPVector *fgrid_cvectors;     /* the grid of this vector may not
50                                                 be on the actual grid */
51    hypre_BoxArrayArray **identity_arrayboxes;
52    hypre_BoxArrayArray **fullwgt_ownboxes;
53    hypre_BoxArrayArray **fullwgt_sendboxes;
54 
55    HYPRE_Int          ***own_cboxnums;       /* local crs boxnums of ownboxes */
56 
57    hypre_CommPkg       **interlevel_comm;
58 /*   hypre_CommPkg       **intralevel_comm;*/ /* may need to build an intra comm so
59      that each processor only fullwts its
60      own fine data- may need to add contrib */
61 
62 } hypre_FacSemiRestrictData2;
63 
64 /*--------------------------------------------------------------------------
65  * hypre_FacSemiRestrictCreate
66  *--------------------------------------------------------------------------*/
67 
68 HYPRE_Int
hypre_FacSemiRestrictCreate2(void ** fac_restrict_vdata_ptr)69 hypre_FacSemiRestrictCreate2( void **fac_restrict_vdata_ptr)
70 {
71    HYPRE_Int                   ierr = 0;
72    hypre_FacSemiRestrictData2 *fac_restrict_data;
73 
74    fac_restrict_data       = hypre_CTAlloc(hypre_FacSemiRestrictData2,  1, HYPRE_MEMORY_HOST);
75    *fac_restrict_vdata_ptr  = (void *) fac_restrict_data;
76 
77    return ierr;
78 }
79 
80 /*--------------------------------------------------------------------------
81  * hypre_FacSemiRestrictSetup:
82  *   Two types of communication are needed- one for the interlevel coarsened
83  *   fine boxes, and the other for the ghostlayer of the restricted vector.
84  *
85  * Approach: Identity away from the patches & fullweighting in a patch.
86  * Since a fbox may not have the desired mapping
87  *   fbox= [a_0, a_1, a_2]x [b_0, b_1, b_2],  a_i= c_i*rfactor[i]
88  *                                            b_i= f_i*rfactor[i] + g_i
89  * with g_i= (rfactor[i]-1), attention must be paid to what the own_boxes,
90  * send_boxes, and recv_boxes are. These map overlap. The reason:
91  * myproc fullwgts what it can or equivalently, gets the restriction
92  * contributions of its data. Some off_procs can compute the remaining
93  * part of the agglomerate belonging to myproc and communicate it to myproc.
94  * Hence, myproc's own_boxes contains these nodes as well as myproc's
95  * recv_boxes.
96  *--------------------------------------------------------------------------*/
97 
98 HYPRE_Int
hypre_FacSemiRestrictSetup2(void * fac_restrict_vdata,hypre_SStructVector * r,HYPRE_Int part_crse,HYPRE_Int part_fine,hypre_SStructPVector * rc,hypre_Index rfactors)99 hypre_FacSemiRestrictSetup2( void                 *fac_restrict_vdata,
100                              hypre_SStructVector  *r,
101                              HYPRE_Int             part_crse,
102                              HYPRE_Int             part_fine,
103                              hypre_SStructPVector *rc,
104                              hypre_Index           rfactors )
105 {
106    HYPRE_Int                 ierr = 0;
107 
108    hypre_FacSemiRestrictData2 *fac_restrict_data = (hypre_FacSemiRestrictData2 *)fac_restrict_vdata;
109    MPI_Comm                    comm= hypre_SStructPVectorComm(rc);
110    hypre_CommInfo             *comm_info;
111    hypre_CommPkg             **interlevel_comm;
112 
113    hypre_SStructPVector       *rf= hypre_SStructVectorPVector(r, part_fine);
114    hypre_StructVector         *s_rc, *s_cvector;
115    hypre_SStructPGrid         *pgrid;
116 
117    hypre_SStructPVector       *fgrid_cvectors;
118    hypre_SStructPGrid         *fgrid_coarsen;
119    hypre_BoxArrayArray       **identity_arrayboxes;
120    hypre_BoxArrayArray       **fullwgt_ownboxes;
121    hypre_BoxArrayArray       **fullwgt_sendboxes;
122    hypre_BoxArray             *boxarray;
123    hypre_BoxArray             *tmp_boxarray, *intersect_boxes;
124    HYPRE_Int                ***own_cboxnums;
125 
126    hypre_BoxArrayArray       **send_boxes, *send_rboxes;
127    HYPRE_Int                ***send_processes;
128    HYPRE_Int                ***send_remote_boxnums;
129 
130    hypre_BoxArrayArray       **recv_boxes, *recv_rboxes;
131    HYPRE_Int                ***recv_processes;
132    HYPRE_Int                ***recv_remote_boxnums;
133 
134    hypre_BoxManager           *boxman;
135    hypre_BoxManEntry         **boxman_entries;
136    HYPRE_Int                   nboxman_entries;
137 
138    hypre_Box                   box, scaled_box;
139 
140    hypre_Index                 zero_index, index, ilower, iupper;
141    HYPRE_Int                   ndim= hypre_SStructVectorNDim(r);
142    HYPRE_Int                   myproc, proc;
143    HYPRE_Int                   nvars, vars;
144    HYPRE_Int                   num_values;
145 
146    HYPRE_Int                   i, cnt1, cnt2;
147    HYPRE_Int                   fi, ci;
148 
149    hypre_BoxInit(&box, ndim);
150    hypre_BoxInit(&scaled_box, ndim);
151 
152    hypre_MPI_Comm_rank(comm, &myproc);
153    hypre_ClearIndex(zero_index);
154 
155    nvars= hypre_SStructPVectorNVars(rc);
156    (fac_restrict_data -> nvars)=  nvars;
157    hypre_CopyIndex(rfactors, (fac_restrict_data -> stride));
158    for (i= ndim; i< 3; i++)
159    {
160       rfactors[i]= 1;
161    }
162 
163    /* work vector for storing the fullweighted fgrid boxes */
164    hypre_SStructPGridCreate(hypre_SStructPVectorComm(rf), ndim, &fgrid_coarsen);
165    pgrid= hypre_SStructPVectorPGrid(rf);
166    for (vars= 0; vars< nvars; vars++)
167    {
168       boxarray= hypre_StructGridBoxes(hypre_SStructPGridSGrid(pgrid, vars));
169       hypre_ForBoxI(fi, boxarray)
170       {
171          hypre_CopyBox(hypre_BoxArrayBox(boxarray, fi), &box);
172          hypre_StructMapFineToCoarse(hypre_BoxIMin(&box), zero_index,
173                                      rfactors, hypre_BoxIMin(&box));
174          hypre_StructMapFineToCoarse(hypre_BoxIMax(&box), zero_index,
175                                      rfactors, hypre_BoxIMax(&box));
176          hypre_SStructPGridSetExtents(fgrid_coarsen,
177                                       hypre_BoxIMin(&box),
178                                       hypre_BoxIMax(&box));
179       }
180    }
181    hypre_SStructPGridSetVariables( fgrid_coarsen, nvars,
182                                    hypre_SStructPGridVarTypes(pgrid) );
183    hypre_SStructPGridAssemble(fgrid_coarsen);
184 
185    hypre_SStructPVectorCreate(hypre_SStructPGridComm(fgrid_coarsen), fgrid_coarsen,
186                               &fgrid_cvectors);
187    hypre_SStructPVectorInitialize(fgrid_cvectors);
188    hypre_SStructPVectorAssemble(fgrid_cvectors);
189 
190    /* pgrid fgrid_coarsen no longer needed */
191    hypre_SStructPGridDestroy(fgrid_coarsen);
192 
193    fac_restrict_data -> fgrid_cvectors= fgrid_cvectors;
194 
195    /*--------------------------------------------------------------------------
196     * boxes that are not underlying a fine box:
197     *
198     * algorithm: subtract all coarsened fine grid boxes that intersect with
199     * this processor's coarse boxes. Note that we cannot loop over all the
200     * coarsened fine boxes and subtract them from the coarse grid since we do
201     * not know if some of the overlying fine boxes belong on another
202     * processor. For each cbox, we get a boxarray of boxes that are not
203     * underlying-> size(identity_arrayboxes[vars])= #cboxes.
204     *
205     * Note that no contraction is needed for the intersect boxes since they
206     * will be subtracted from the cbox. Contraction can erroneously lead
207     * to bigger identity boxes.
208     *--------------------------------------------------------------------------*/
209    identity_arrayboxes= hypre_CTAlloc(hypre_BoxArrayArray *,  nvars, HYPRE_MEMORY_HOST);
210    pgrid= hypre_SStructPVectorPGrid(rc);
211 
212    hypre_ClearIndex(index);
213    for (i= 0; i< ndim; i++)
214    {
215       index[i]= rfactors[i]-1;
216    }
217 
218    tmp_boxarray = hypre_BoxArrayCreate(0, ndim);
219    for (vars= 0; vars< nvars; vars++)
220    {
221       boxman= hypre_SStructGridBoxManager(hypre_SStructVectorGrid(r),
222                                           part_fine, vars);
223       boxarray= hypre_StructGridBoxes(hypre_SStructPGridSGrid(pgrid, vars));
224 
225       identity_arrayboxes[vars]= hypre_BoxArrayArrayCreate(hypre_BoxArraySize(boxarray), ndim);
226 
227       hypre_ForBoxI(ci, boxarray)
228       {
229          hypre_CopyBox(hypre_BoxArrayBox(boxarray, ci), &box);
230          hypre_AppendBox(&box,
231                          hypre_BoxArrayArrayBoxArray(identity_arrayboxes[vars], ci));
232 
233          hypre_StructMapCoarseToFine(hypre_BoxIMin(&box), zero_index,
234                                      rfactors, hypre_BoxIMin(&scaled_box));
235          hypre_StructMapCoarseToFine(hypre_BoxIMax(&box), index,
236                                      rfactors, hypre_BoxIMax(&scaled_box));
237 
238          hypre_BoxManIntersect(boxman, hypre_BoxIMin(&scaled_box),
239                                hypre_BoxIMax(&scaled_box), &boxman_entries,
240                                &nboxman_entries);
241 
242          /* all send and coarsened fboxes on this processor are collected */
243          intersect_boxes= hypre_BoxArrayCreate(0, ndim);
244          for (i= 0; i< nboxman_entries; i++)
245          {
246             hypre_BoxManEntryGetExtents(boxman_entries[i], ilower, iupper);
247             hypre_BoxSetExtents(&box, ilower, iupper);
248             hypre_IntersectBoxes(&box, &scaled_box, &box);
249 
250             hypre_StructMapFineToCoarse(hypre_BoxIMin(&box), zero_index,
251                                         rfactors, hypre_BoxIMin(&box));
252             hypre_StructMapFineToCoarse(hypre_BoxIMax(&box), zero_index,
253                                         rfactors, hypre_BoxIMax(&box));
254             hypre_AppendBox(&box, intersect_boxes);
255          }
256 
257          hypre_SubtractBoxArrays(hypre_BoxArrayArrayBoxArray(identity_arrayboxes[vars], ci),
258                                  intersect_boxes, tmp_boxarray);
259          hypre_MinUnionBoxes(hypre_BoxArrayArrayBoxArray(identity_arrayboxes[vars], ci));
260 
261          hypre_TFree(boxman_entries, HYPRE_MEMORY_HOST);
262          hypre_BoxArrayDestroy(intersect_boxes);
263       }
264    }
265    hypre_BoxArrayDestroy(tmp_boxarray);
266    fac_restrict_data -> identity_arrayboxes= identity_arrayboxes;
267 
268    /*--------------------------------------------------------------------------
269     * fboxes that are coarsened. Some will be sent. We create the communication
270     * pattern. For each fbox, we need a boxarray of sendboxes or ownboxes.
271     *
272     * Algorithm: Coarsen each fbox and see which cboxes it intersects using
273     * BoxManIntersect. Cboxes that do not belong on the processor will have
274     * a chunk sent to it.
275     *
276     * Note that no contraction is needed. Contraction can lead to erroneous
277     * send_boxes.
278     *--------------------------------------------------------------------------*/
279    interlevel_comm= hypre_CTAlloc(hypre_CommPkg *,  nvars, HYPRE_MEMORY_HOST);
280    fullwgt_sendboxes= hypre_CTAlloc(hypre_BoxArrayArray *,  nvars, HYPRE_MEMORY_HOST);
281    fullwgt_ownboxes= hypre_CTAlloc(hypre_BoxArrayArray *,  nvars, HYPRE_MEMORY_HOST);
282    own_cboxnums= hypre_CTAlloc(HYPRE_Int **,  nvars, HYPRE_MEMORY_HOST);
283 
284    send_boxes= hypre_CTAlloc(hypre_BoxArrayArray *,  nvars, HYPRE_MEMORY_HOST);
285    send_processes= hypre_CTAlloc(HYPRE_Int **,  nvars, HYPRE_MEMORY_HOST);
286    send_remote_boxnums= hypre_CTAlloc(HYPRE_Int **,  nvars, HYPRE_MEMORY_HOST);
287 
288    pgrid= hypre_SStructPVectorPGrid(rf);
289    for (vars= 0; vars< nvars; vars++)
290    {
291       boxman= hypre_SStructGridBoxManager(hypre_SStructVectorGrid(r),
292                                           part_crse, vars);
293       boxarray= hypre_StructGridBoxes(hypre_SStructPGridSGrid(pgrid, vars));
294       fullwgt_sendboxes[vars]= hypre_BoxArrayArrayCreate(hypre_BoxArraySize(boxarray), ndim);
295       fullwgt_ownboxes[vars] = hypre_BoxArrayArrayCreate(hypre_BoxArraySize(boxarray), ndim);
296       own_cboxnums[vars]     = hypre_CTAlloc(HYPRE_Int *,  hypre_BoxArraySize(boxarray), HYPRE_MEMORY_HOST);
297 
298       send_boxes[vars]         = hypre_BoxArrayArrayCreate(hypre_BoxArraySize(boxarray), ndim);
299       send_processes[vars]     = hypre_CTAlloc(HYPRE_Int *,  hypre_BoxArraySize(boxarray), HYPRE_MEMORY_HOST);
300       send_remote_boxnums[vars]= hypre_CTAlloc(HYPRE_Int *,  hypre_BoxArraySize(boxarray), HYPRE_MEMORY_HOST);
301 
302       hypre_ForBoxI(fi, boxarray)
303       {
304          hypre_CopyBox(hypre_BoxArrayBox(boxarray, fi), &box);
305          hypre_StructMapFineToCoarse(hypre_BoxIMin(&box), zero_index,
306                                      rfactors, hypre_BoxIMin(&scaled_box));
307          hypre_StructMapFineToCoarse(hypre_BoxIMax(&box), zero_index,
308                                      rfactors, hypre_BoxIMax(&scaled_box));
309 
310          hypre_BoxManIntersect(boxman, hypre_BoxIMin(&scaled_box),
311                                hypre_BoxIMax(&scaled_box), &boxman_entries, &nboxman_entries);
312 
313          cnt1= 0; cnt2= 0;
314          for (i= 0; i< nboxman_entries; i++)
315          {
316             hypre_SStructBoxManEntryGetProcess(boxman_entries[i], &proc);
317             if (proc != myproc)
318             {
319                cnt1++;
320             }
321             else
322             {
323                cnt2++;
324             }
325          }
326          send_processes[vars][fi]     = hypre_CTAlloc(HYPRE_Int,  cnt1, HYPRE_MEMORY_HOST);
327          send_remote_boxnums[vars][fi]= hypre_CTAlloc(HYPRE_Int,  cnt1, HYPRE_MEMORY_HOST);
328          own_cboxnums[vars][fi]       = hypre_CTAlloc(HYPRE_Int,  cnt2, HYPRE_MEMORY_HOST);
329 
330          cnt1= 0; cnt2= 0;
331          for (i= 0; i< nboxman_entries; i++)
332          {
333             hypre_BoxManEntryGetExtents(boxman_entries[i], ilower, iupper);
334             hypre_BoxSetExtents(&box, ilower, iupper);
335             hypre_IntersectBoxes(&box, &scaled_box, &box);
336 
337             hypre_SStructBoxManEntryGetProcess(boxman_entries[i], &proc);
338             if (proc != myproc)
339             {
340                hypre_AppendBox(&box,
341                                hypre_BoxArrayArrayBoxArray(fullwgt_sendboxes[vars], fi));
342                hypre_AppendBox(&box,
343                                hypre_BoxArrayArrayBoxArray(send_boxes[vars], fi));
344 
345                send_processes[vars][fi][cnt1]= proc;
346                hypre_SStructBoxManEntryGetBoxnum(boxman_entries[i],
347                                                  &send_remote_boxnums[vars][fi][cnt1]);
348                cnt1++;
349             }
350 
351             else
352             {
353                hypre_AppendBox(&box,
354                                hypre_BoxArrayArrayBoxArray(fullwgt_ownboxes[vars], fi));
355                hypre_SStructBoxManEntryGetBoxnum(boxman_entries[i],
356                                                  &own_cboxnums[vars][fi][cnt2]);
357                cnt2++;
358             }
359          }
360          hypre_TFree(boxman_entries, HYPRE_MEMORY_HOST);
361 
362       }  /* hypre_ForBoxI(fi, boxarray) */
363    }     /* for (vars= 0; vars< nvars; vars++) */
364 
365    (fac_restrict_data -> fullwgt_sendboxes)= fullwgt_sendboxes;
366    (fac_restrict_data -> fullwgt_ownboxes)= fullwgt_ownboxes;
367    (fac_restrict_data -> own_cboxnums)= own_cboxnums;
368 
369    /*--------------------------------------------------------------------------
370     * coarsened fboxes this processor will receive.
371     *
372     * Algorithm: For each cbox on this processor, refine it and find which
373     * processors the refinement belongs in. The processors owning a chunk
374     * are the recv_processors.
375     *--------------------------------------------------------------------------*/
376    recv_boxes= hypre_CTAlloc(hypre_BoxArrayArray *,  nvars, HYPRE_MEMORY_HOST);
377    recv_processes= hypre_CTAlloc(HYPRE_Int **,  nvars, HYPRE_MEMORY_HOST);
378 
379    /* dummy pointer for CommInfoCreate */
380    recv_remote_boxnums= hypre_CTAlloc(HYPRE_Int **,  nvars, HYPRE_MEMORY_HOST);
381 
382    pgrid= hypre_SStructPVectorPGrid(rc);
383    for (vars= 0; vars< nvars; vars++)
384    {
385       boxman= hypre_SStructGridBoxManager(hypre_SStructVectorGrid(r),
386                                           part_fine, vars);
387       boxarray= hypre_StructGridBoxes(hypre_SStructPGridSGrid(pgrid, vars));
388 
389       recv_boxes[vars]    = hypre_BoxArrayArrayCreate(hypre_BoxArraySize(boxarray), ndim);
390       recv_processes[vars]= hypre_CTAlloc(HYPRE_Int *,  hypre_BoxArraySize(boxarray), HYPRE_MEMORY_HOST);
391       recv_remote_boxnums[vars]= hypre_CTAlloc(HYPRE_Int *,  hypre_BoxArraySize(boxarray), HYPRE_MEMORY_HOST);
392 
393       hypre_ForBoxI(ci, boxarray)
394       {
395          hypre_CopyBox(hypre_BoxArrayBox(boxarray, ci), &box);
396          hypre_StructMapCoarseToFine(hypre_BoxIMin(&box), zero_index,
397                                      rfactors, hypre_BoxIMin(&scaled_box));
398          hypre_StructMapCoarseToFine(hypre_BoxIMax(&box), index,
399                                      rfactors, hypre_BoxIMax(&scaled_box));
400 
401          hypre_BoxManIntersect(boxman, hypre_BoxIMin(&scaled_box),
402                                hypre_BoxIMax(&scaled_box), &boxman_entries, &nboxman_entries);
403 
404          cnt1= 0;
405          for (i= 0; i< nboxman_entries; i++)
406          {
407             hypre_SStructBoxManEntryGetProcess(boxman_entries[i], &proc);
408             if (proc != myproc)
409             {
410                cnt1++;
411             }
412          }
413          recv_processes[vars][ci]= hypre_CTAlloc(HYPRE_Int,  cnt1, HYPRE_MEMORY_HOST);
414          recv_remote_boxnums[vars][ci]= hypre_CTAlloc(HYPRE_Int ,  cnt1, HYPRE_MEMORY_HOST);
415 
416          cnt1= 0;
417          for (i= 0; i< nboxman_entries; i++)
418          {
419             hypre_SStructBoxManEntryGetProcess(boxman_entries[i], &proc);
420             if (proc != myproc)
421             {
422                hypre_BoxManEntryGetExtents(boxman_entries[i], ilower, iupper);
423                hypre_BoxSetExtents(&box, ilower, iupper);
424                hypre_IntersectBoxes(&box, &scaled_box, &box);
425 
426                /* no contracting neede */
427                hypre_StructMapFineToCoarse(hypre_BoxIMin(&box), zero_index,
428                                            rfactors, hypre_BoxIMin(&box));
429                hypre_StructMapFineToCoarse(hypre_BoxIMax(&box), zero_index,
430                                            rfactors, hypre_BoxIMax(&box));
431                hypre_AppendBox(&box,
432                                hypre_BoxArrayArrayBoxArray(recv_boxes[vars], ci));
433 
434                recv_processes[vars][ci][cnt1]= proc;
435                cnt1++;
436 
437             }  /* if (proc != myproc) */
438          }     /* for (i= 0; i< nmap_entries; i++) */
439 
440          hypre_TFree(boxman_entries, HYPRE_MEMORY_HOST);
441 
442       }        /* hypre_ForBoxI(ci, boxarray) */
443    }           /* for (vars= 0; vars< nvars; vars++) */
444 
445    num_values= 1;
446    for (vars= 0; vars< nvars; vars++)
447    {
448       s_rc     = hypre_SStructPVectorSVector(rc, vars);
449       s_cvector= hypre_SStructPVectorSVector(fgrid_cvectors, vars);
450       send_rboxes= hypre_BoxArrayArrayDuplicate(send_boxes[vars]);
451       recv_rboxes= hypre_BoxArrayArrayDuplicate(recv_boxes[vars]);
452 
453       hypre_CommInfoCreate(send_boxes[vars], recv_boxes[vars],
454                            send_processes[vars], recv_processes[vars],
455                            send_remote_boxnums[vars], recv_remote_boxnums[vars],
456                            send_rboxes, recv_rboxes, 1, &comm_info);
457 
458       hypre_CommPkgCreate(comm_info,
459                           hypre_StructVectorDataSpace(s_cvector),
460                           hypre_StructVectorDataSpace(s_rc),
461                           num_values, NULL, 0,
462                           hypre_StructVectorComm(s_rc),
463                           &interlevel_comm[vars]);
464       hypre_CommInfoDestroy(comm_info);
465    }
466    hypre_TFree(send_boxes, HYPRE_MEMORY_HOST);
467    hypre_TFree(recv_boxes, HYPRE_MEMORY_HOST);
468    hypre_TFree(send_processes, HYPRE_MEMORY_HOST);
469    hypre_TFree(recv_processes, HYPRE_MEMORY_HOST);
470    hypre_TFree(send_remote_boxnums, HYPRE_MEMORY_HOST);
471    hypre_TFree(recv_remote_boxnums, HYPRE_MEMORY_HOST);
472 
473    (fac_restrict_data -> interlevel_comm)= interlevel_comm;
474 
475    return ierr;
476 
477 }
478 
479 HYPRE_Int
hypre_FACRestrict2(void * fac_restrict_vdata,hypre_SStructVector * xf,hypre_SStructPVector * xc)480 hypre_FACRestrict2( void                 *  fac_restrict_vdata,
481                     hypre_SStructVector  *  xf,
482                     hypre_SStructPVector *  xc)
483 {
484    HYPRE_Int ierr = 0;
485 
486    hypre_FacSemiRestrictData2 *restrict_data = (hypre_FacSemiRestrictData2 *)fac_restrict_vdata;
487 
488    hypre_SStructPVector   *fgrid_cvectors     = restrict_data->fgrid_cvectors;
489    hypre_BoxArrayArray   **identity_arrayboxes= restrict_data->identity_arrayboxes;
490    hypre_BoxArrayArray   **fullwgt_ownboxes   = restrict_data->fullwgt_ownboxes;
491    HYPRE_Int            ***own_cboxnums       = restrict_data->own_cboxnums;
492    hypre_CommPkg         **interlevel_comm= restrict_data-> interlevel_comm;
493    hypre_CommHandle       *comm_handle;
494 
495    HYPRE_Int               ndim           =  hypre_SStructVectorNDim(xf);
496 
497    hypre_BoxArrayArray    *arrayarray_ownboxes;
498 
499    hypre_IndexRef          stride;  /* refinement factors */
500 
501    hypre_StructGrid       *fgrid;
502    hypre_BoxArray         *fgrid_boxes;
503    hypre_Box              *fgrid_box;
504    hypre_StructGrid       *cgrid;
505    hypre_BoxArray         *cgrid_boxes;
506    hypre_BoxArray         *own_boxes;
507    hypre_Box              *own_box;
508    HYPRE_Int              *boxnums;
509 
510    hypre_Box              *xc_temp_dbox;
511    hypre_Box              *xf_dbox;
512 
513    hypre_StructVector     *xc_temp;
514    hypre_StructVector     *xc_var;
515    hypre_StructVector     *xf_var;
516 
517    HYPRE_Real           ***xfp;
518    HYPRE_Real           ***xcp;
519    HYPRE_Real           ***xcp_temp;
520 
521    hypre_Index             loop_size, lindex;
522    hypre_Index             start, fbox_size, node_offset;
523    hypre_Index             startc;
524    hypre_Index             stridec;
525    hypre_Index             rfactors;
526    hypre_Index             temp_index1, temp_index2;
527 
528    HYPRE_Int               fi, ci;
529    HYPRE_Int               nvars, var;
530    HYPRE_Int               volume_crse_cell;
531 
532    HYPRE_Int               i, j, k;
533    HYPRE_Int               imax, jmax, kmax;
534    HYPRE_Int               icell, jcell, kcell, ijkcell;
535 
536    HYPRE_Real             *sum;
537    HYPRE_Real              scaling;
538 
539    HYPRE_Int               part_crse= 0;
540    HYPRE_Int               part_fine= 1;
541    HYPRE_Int               num_coarse_cells;
542 
543    /*-----------------------------------------------------------------------
544     * Initialize some things
545     *-----------------------------------------------------------------------*/
546    stride= (restrict_data -> stride);
547 
548    hypre_ClearIndex(stridec);
549    for (i= 0; i< ndim; i++)
550    {
551       stridec[i]= 1;
552    }
553 
554    hypre_CopyIndex(stride, rfactors);
555    for (i= ndim; i< 3; i++)
556    {
557       rfactors[i]= 1;
558    }
559 
560    volume_crse_cell= 1;
561    for (i= 0; i< ndim; i++)
562    {
563       volume_crse_cell*= rfactors[i];
564    }
565 
566    /*-----------------------------------------------------------------------
567     * We are assuming the refinement and coarsening have same variable
568     * types.
569     *-----------------------------------------------------------------------*/
570    nvars=  hypre_SStructPVectorNVars(xc);
571 
572    /*-----------------------------------------------------------------------
573     * For each coordinate direction, a fine node can contribute only to the
574     * left or right cell=> only 2 coarse cells per direction.
575     *-----------------------------------------------------------------------*/
576    num_coarse_cells= 1;
577    for (i= 0; i< ndim; i++)
578    {
579       num_coarse_cells*= 2;
580    }
581    sum= hypre_CTAlloc(HYPRE_Real,  num_coarse_cells, HYPRE_MEMORY_HOST);
582 
583    /*--------------------------------------------------------------------------
584     * Scaling for averaging restriction.
585     *--------------------------------------------------------------------------*/
586    scaling= 1.0;
587    for (i= 0; i< ndim-2; i++)
588    {
589       scaling*= rfactors[0];
590    }
591 
592    /*-----------------------------------------------------------------------
593     * Initialize the coarse vector to zero.
594     *-----------------------------------------------------------------------*/
595    hypre_SStructPVectorSetConstantValues(xc, 0.0);
596 
597    /*-----------------------------------------------------------------------
598     * Copy the coarse data: xf[part_crse] -> xc
599     *-----------------------------------------------------------------------*/
600    hypre_SStructPartialPCopy(hypre_SStructVectorPVector(xf, part_crse),
601                              xc, identity_arrayboxes);
602 
603    /*-----------------------------------------------------------------------
604     * Piecewise constant restriction over the refinement patch.
605     *
606     * Initialize the work vector by setting to zero.
607     *-----------------------------------------------------------------------*/
608    hypre_SStructPVectorSetConstantValues(fgrid_cvectors, 0.0);
609 
610    /*-----------------------------------------------------------------------
611     * Allocate memory for the data pointers. Assuming constant restriction.
612     * We stride through the refinement patch by the refinement factors, and
613     * so we must have pointers to the intermediate fine nodes=> xfp will
614     * be size rfactors[2]*rfactors[1]. Because the fbox may not have the
615     * ideal refinement form, we need to contribute to 2^ndim cells.
616     *-----------------------------------------------------------------------*/
617    if (ndim > 1)
618    {
619       xcp_temp= hypre_TAlloc(HYPRE_Real **,  (ndim-1), HYPRE_MEMORY_HOST);
620       xcp     = hypre_TAlloc(HYPRE_Real **,  (ndim-1), HYPRE_MEMORY_HOST);
621       for (k= 0; k< (ndim-1); k++)
622       {
623          xcp_temp[k]= hypre_TAlloc(HYPRE_Real *,  2, HYPRE_MEMORY_HOST);
624          xcp[k]     = hypre_TAlloc(HYPRE_Real *,  2, HYPRE_MEMORY_HOST);
625       }
626    }
627    else /* 1d does not really require these HYPRE_Real ptrs */
628    {
629       xcp_temp   = hypre_TAlloc(HYPRE_Real **,  1, HYPRE_MEMORY_HOST);
630       xcp        = hypre_TAlloc(HYPRE_Real **,  1, HYPRE_MEMORY_HOST);
631       xcp_temp[0]= hypre_TAlloc(HYPRE_Real *,  1, HYPRE_MEMORY_HOST);
632       xcp[0]     = hypre_TAlloc(HYPRE_Real *,  1, HYPRE_MEMORY_HOST);
633    }
634 
635    /* memory allocation of xfp is okay for all dimensions */
636    xfp= hypre_TAlloc(HYPRE_Real **,  rfactors[2], HYPRE_MEMORY_HOST);
637    for (k= 0; k< rfactors[2]; k++)
638    {
639       xfp[k]= hypre_TAlloc(HYPRE_Real *,  rfactors[1], HYPRE_MEMORY_HOST);
640    }
641 
642    for (var= 0; var< nvars; var++)
643    {
644       xc_temp= hypre_SStructPVectorSVector(fgrid_cvectors, var);
645       xf_var= hypre_SStructPVectorSVector(hypre_SStructVectorPVector(xf,part_fine),
646                                           var);
647 
648       fgrid        = hypre_StructVectorGrid(xf_var);
649       fgrid_boxes  = hypre_StructGridBoxes(fgrid);
650       cgrid        = hypre_StructVectorGrid(xc_temp);
651       cgrid_boxes  = hypre_StructGridBoxes(cgrid);
652 
653       hypre_ForBoxI(fi, fgrid_boxes)
654       {
655          fgrid_box= hypre_BoxArrayBox(fgrid_boxes, fi);
656 
657          /*--------------------------------------------------------------------
658           * Get the ptrs for the fine struct_vectors.
659           *--------------------------------------------------------------------*/
660          xf_dbox  = hypre_BoxArrayBox(hypre_StructVectorDataSpace(xf_var), fi);
661          for (k= 0; k< rfactors[2]; k++)
662          {
663             for (j=0; j< rfactors[1]; j++)
664             {
665                hypre_SetIndex3(temp_index1, 0, j, k);
666                xfp[k][j]= hypre_StructVectorBoxData(xf_var, fi) +
667                   hypre_BoxOffsetDistance(xf_dbox, temp_index1);
668             }
669          }
670 
671          /*--------------------------------------------------------------------
672           * Get the ptrs for the coarse struct_vectors. Note that the coarse
673           * work vector is indexed with respect to the local fine box no.'s.
674           * Work vectors were created this way.
675           * Dimensionally dependent.
676           *--------------------------------------------------------------------*/
677          xc_temp_dbox= hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc_temp), fi);
678          if (ndim > 1)
679          {
680             for (k= 0; k< (ndim-1); k++)
681             {
682                for (j=0; j< 2; j++)
683                {
684                   hypre_SetIndex3(temp_index1, 0, j, k);
685                   xcp_temp[k][j]= hypre_StructVectorBoxData(xc_temp, fi) +
686                      hypre_BoxOffsetDistance(xc_temp_dbox, temp_index1);
687                }
688             }
689          }
690          else /* 1d case */
691          {
692             hypre_ClearIndex(temp_index1);
693             xcp_temp[0][0]= hypre_StructVectorBoxData(xc_temp, fi) +
694                hypre_BoxOffsetDistance(xc_temp_dbox, temp_index1);
695          }
696          hypre_CopyIndex(hypre_BoxIMin(fgrid_box), start);
697          hypre_CopyIndex(hypre_BoxIMax(fgrid_box), fbox_size);
698 
699          /*--------------------------------------------------------------------
700           * Adjust "fbox_size" so that this hypre_Index is appropriate for
701           * ndim < 3.
702           *    fbox_size= hypre_BoxIMax(fgrid_box)-hypre_BoxIMin(fgrid_box)+1.
703           *--------------------------------------------------------------------*/
704          for (i= 0; i< 3; i++)
705          {
706             fbox_size[i]-= (start[i]-1);
707          }
708 
709          /*--------------------------------------------------------------------
710           * The fine intersection box may not be divisible by the refinement
711           * factor. We need to know the remainder to determine which
712           * coarse node gets the restricted values.
713           *--------------------------------------------------------------------*/
714          hypre_ClearIndex(node_offset);
715          for (i= 0; i< ndim; i++)
716          {
717             node_offset[i]= rfactors[i]-(start[i]%rfactors[i])-1;
718          }
719 
720          hypre_SetIndex3(temp_index2, 0, 0, 0);
721          hypre_StructMapFineToCoarse(start, temp_index2, rfactors, startc);
722 
723          hypre_BoxGetSize(fgrid_box, temp_index1);
724          hypre_StructMapFineToCoarse(temp_index1, temp_index2, rfactors, loop_size);
725 
726          hypre_SerialBoxLoop2Begin(ndim, loop_size,
727                                    xf_dbox, start, stride,  xfi,
728                                    xc_temp_dbox, startc, stridec, xci);
729          {
730             /*-----------------------------------------------------------------
731              * Arithmetic average the refinement patch values to get
732              * restricted coarse grid values in an agglomerate; i.e.,
733              * piecewise constant restriction.
734              *-----------------------------------------------------------------*/
735             zypre_BoxLoopGetIndex(lindex);
736             imax= hypre_min( (fbox_size[0]-lindex[0]*stride[0]), rfactors[0] );
737             jmax= hypre_min( (fbox_size[1]-lindex[1]*stride[1]), rfactors[1] );
738             kmax= hypre_min( (fbox_size[2]-lindex[2]*stride[2]), rfactors[2] );
739 
740             for (i= 0; i< num_coarse_cells; i++)
741             {
742                sum[i]= 0.0;
743             }
744 
745             for (k= 0; k< kmax; k++)
746             {
747                kcell= 1;
748                if (k <= node_offset[2])
749                {
750                   kcell= 0;
751                }
752 
753                for (j= 0; j< jmax; j++)
754                {
755                   jcell= 1;
756                   if (j <= node_offset[1])
757                   {
758                      jcell= 0;
759                   }
760 
761                   for (i= 0; i< imax; i++)
762                   {
763                      icell= 1;
764                      if (i <= node_offset[0])
765                      {
766                         icell= 0;
767                      }
768 
769                      MapCellRank(icell, jcell , kcell, ijkcell);
770                      sum[ijkcell]+= xfp[k][j][xfi+i];
771                   }
772                }
773             }
774 
775             /*-----------------------------------------------------------------
776              * Add the compute averages to the correct coarse cell.
777              *-----------------------------------------------------------------*/
778             for (ijkcell= 0; ijkcell< num_coarse_cells; ijkcell++)
779             {
780                if (sum[ijkcell] != 0.0)
781                {
782                   sum[ijkcell]/= scaling;
783                   InverseMapCellRank(ijkcell, temp_index2);
784                   i=  temp_index2[0];
785                   j=  temp_index2[1];
786                   k=  temp_index2[2];
787                   xcp_temp[k][j][xci+i]+= sum[ijkcell];
788                }
789             }
790 
791          }
792          hypre_SerialBoxLoop2End(xfi, xci);
793 
794       }   /* hypre_ForBoxI(fi, fgrid_boxes) */
795    }      /* for (var= 0; var< nvars; var++)*/
796 
797    /*------------------------------------------------------------------
798     * Communicate calculated restricted function over the coarsened
799     * patch. Only actual communicated values will be put in the
800     * coarse vector.
801     *------------------------------------------------------------------*/
802    for (var= 0; var< nvars; var++)
803    {
804       xc_temp= hypre_SStructPVectorSVector(fgrid_cvectors, var);
805       xc_var= hypre_SStructPVectorSVector(xc, var);
806       hypre_InitializeCommunication(interlevel_comm[var],
807                                     hypre_StructVectorData(xc_temp),
808                                     hypre_StructVectorData(xc_var), 0, 0,
809                                     &comm_handle);
810 
811       hypre_FinalizeCommunication(comm_handle);
812    }
813 
814    /*------------------------------------------------------------------
815     * Need to add the coarsened patches that belong on this processor
816     * to the coarse vector.
817     *------------------------------------------------------------------*/
818    for (var= 0; var< nvars; var++)
819    {
820       xc_temp= hypre_SStructPVectorSVector(fgrid_cvectors, var);
821       xc_var= hypre_SStructPVectorSVector(xc, var);
822 
823       cgrid        = hypre_StructVectorGrid(xc_temp);
824       cgrid_boxes  = hypre_StructGridBoxes(cgrid);
825 
826       arrayarray_ownboxes= fullwgt_ownboxes[var];
827       hypre_ForBoxI(ci, cgrid_boxes)
828       {
829          xc_temp_dbox= hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc_temp), ci);
830          xcp_temp[0][0]= hypre_StructVectorBoxData(xc_temp, ci);
831 
832          /*--------------------------------------------------------------
833           * Each ci box of cgrid_box has a boxarray of subboxes. Copy
834           * each of these subboxes to the coarse vector.
835           *--------------------------------------------------------------*/
836          own_boxes= hypre_BoxArrayArrayBoxArray(arrayarray_ownboxes, ci);
837          boxnums  = own_cboxnums[var][ci];
838          hypre_ForBoxI(i, own_boxes)
839          {
840             own_box= hypre_BoxArrayBox(own_boxes, i);
841             xf_dbox= hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc_var), boxnums[i]);
842             xcp[0][0]= hypre_StructVectorBoxData(xc_var, boxnums[i]);
843 
844             hypre_BoxGetSize(own_box, loop_size);
845 
846 #define DEVICE_VAR is_device_ptr(xcp, xcp_temp)
847             hypre_BoxLoop2Begin(ndim, loop_size,
848                                 xc_temp_dbox, hypre_BoxIMin(own_box), stridec, xfi,
849                                 xf_dbox, hypre_BoxIMin(own_box), stridec, xci);
850             {
851                xcp[0][0][xci]+= xcp_temp[0][0][xfi];
852             }
853             hypre_BoxLoop2End(xfi, xci);
854 #undef DEVICE_VAR
855 
856          }  /* hypre_ForBoxI(i, own_boxes) */
857       }     /* hypre_ForBoxI(ci, cgrid_boxes) */
858    }        /* for (var= 0; var< nvars; var++) */
859 
860    hypre_TFree(sum, HYPRE_MEMORY_HOST);
861    for (k= 0; k< rfactors[2]; k++)
862    {
863       hypre_TFree(xfp[k], HYPRE_MEMORY_HOST);
864    }
865    hypre_TFree(xfp, HYPRE_MEMORY_HOST);
866 
867    if (ndim > 1)
868    {
869       for (k= 0; k< (ndim-1); k++)
870       {
871          hypre_TFree(xcp_temp[k], HYPRE_MEMORY_HOST);
872          hypre_TFree(xcp[k], HYPRE_MEMORY_HOST);
873       }
874    }
875    else
876    {
877       hypre_TFree(xcp_temp[0], HYPRE_MEMORY_HOST);
878       hypre_TFree(xcp[0], HYPRE_MEMORY_HOST);
879    }
880 
881    hypre_TFree(xcp_temp, HYPRE_MEMORY_HOST);
882    hypre_TFree(xcp, HYPRE_MEMORY_HOST);
883 
884    return ierr;
885 }
886 
887 /*--------------------------------------------------------------------------
888  * hypre_FacSemiRestrictDestroy
889  *--------------------------------------------------------------------------*/
890 
891 HYPRE_Int
hypre_FacSemiRestrictDestroy2(void * fac_restrict_vdata)892 hypre_FacSemiRestrictDestroy2( void *fac_restrict_vdata )
893 {
894    HYPRE_Int                   ierr = 0;
895    hypre_FacSemiRestrictData2 *fac_restrict_data = (hypre_FacSemiRestrictData2 *)fac_restrict_vdata;
896    HYPRE_Int                   nvars;
897    HYPRE_Int                   i, j;
898 
899 
900    if (fac_restrict_data)
901    {
902       nvars= (fac_restrict_data-> nvars);
903       hypre_SStructPVectorDestroy(fac_restrict_data-> fgrid_cvectors);
904 
905       for (i= 0; i< nvars; i++)
906       {
907          hypre_BoxArrayArrayDestroy((fac_restrict_data -> identity_arrayboxes)[i]);
908          hypre_BoxArrayArrayDestroy((fac_restrict_data -> fullwgt_sendboxes)[i]);
909          for (j= 0; j< hypre_BoxArrayArraySize(fac_restrict_data->fullwgt_ownboxes[i]); j++)
910          {
911             hypre_TFree((fac_restrict_data -> own_cboxnums)[i][j], HYPRE_MEMORY_HOST);
912          }
913          hypre_TFree((fac_restrict_data -> own_cboxnums)[i], HYPRE_MEMORY_HOST);
914 
915          hypre_BoxArrayArrayDestroy((fac_restrict_data -> fullwgt_ownboxes)[i]);
916          hypre_CommPkgDestroy((fac_restrict_data -> interlevel_comm)[i]);
917       }
918 
919       hypre_TFree(fac_restrict_data -> identity_arrayboxes, HYPRE_MEMORY_HOST);
920       hypre_TFree(fac_restrict_data -> fullwgt_sendboxes, HYPRE_MEMORY_HOST);
921       hypre_TFree(fac_restrict_data -> own_cboxnums, HYPRE_MEMORY_HOST);
922       hypre_TFree(fac_restrict_data -> fullwgt_ownboxes, HYPRE_MEMORY_HOST);
923       hypre_TFree(fac_restrict_data -> interlevel_comm, HYPRE_MEMORY_HOST);
924 
925       hypre_TFree(fac_restrict_data, HYPRE_MEMORY_HOST);
926    }
927    return ierr;
928 
929 }
930