1 /* Functions specific to the 3-dimensional implementation of DMStag */
2 #include <petsc/private/dmstagimpl.h>
3 
4 /*@C
5   DMStagCreate3d - Create an object to manage data living on the faces, edges, and vertices of a parallelized regular 3D grid.
6 
7   Collective
8 
9   Input Parameters:
10 + comm - MPI communicator
11 . bndx,bndy,bndz - boundary type: DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, or DM_BOUNDARY_GHOSTED
12 . M,N,P - global number of grid points in x,y directions
13 . m,n,p - number of ranks in the x,y directions (may be PETSC_DECIDE)
14 . dof0 - number of degrees of freedom per vertex/point/node/0-cell
15 . dof1 - number of degrees of freedom per edge/1-cell
16 . dof2 - number of degrees of freedom per face/2-cell
17 . dof3 - number of degrees of freedom per element/3-cell
18 . stencilType - ghost/halo region type: DMSTAG_STENCIL_NONE, DMSTAG_STENCIL_BOX, or DMSTAG_STENCIL_STAR
19 . stencilWidth - width, in elements, of halo/ghost region
20 - lx,ly,lz - array sof local x,y,z element counts, of length equal to m,n,p, summing to M,N,P
21 
22   Output Parameter:
23 . dm - the new DMStag object
24 
25   Options Database Keys:
26 + -dm_view - calls DMViewFromOptions() a the conclusion of DMSetUp()
27 . -stag_grid_x <nx> - number of elements in the x direction
28 . -stag_grid_y <ny> - number of elements in the y direction
29 . -stag_grid_z <nz> - number of elements in the z direction
30 . -stag_ranks_x <rx> - number of ranks in the x direction
31 . -stag_ranks_y <ry> - number of ranks in the y direction
32 . -stag_ranks_z <rz> - number of ranks in the z direction
33 . -stag_ghost_stencil_width - width of ghost region, in elements
34 . -stag_boundary_type x <none,ghosted,periodic> - DMBoundaryType value
35 . -stag_boundary_type y <none,ghosted,periodic> - DMBoundaryType value
36 - -stag_boundary_type z <none,ghosted,periodic> - DMBoundaryType value
37 
38   Notes:
39   You must call DMSetUp() after this call before using the DM.
40   If you wish to use the options database (see the keys above) to change values in the DMStag, you must call
41   DMSetFromOptions() after this function but before DMSetUp().
42 
43   Level: beginner
44 
45 .seealso: DMSTAG, DMStagCreate1d(), DMStagCreate2d(), DMDestroy(), DMView(), DMCreateGlobalVector(), DMCreateLocalVector(), DMLocalToGlobalBegin(), DMDACreate3d()
46 @*/
DMStagCreate3d(MPI_Comm comm,DMBoundaryType bndx,DMBoundaryType bndy,DMBoundaryType bndz,PetscInt M,PetscInt N,PetscInt P,PetscInt m,PetscInt n,PetscInt p,PetscInt dof0,PetscInt dof1,PetscInt dof2,PetscInt dof3,DMStagStencilType stencilType,PetscInt stencilWidth,const PetscInt lx[],const PetscInt ly[],const PetscInt lz[],DM * dm)47 PETSC_EXTERN PetscErrorCode DMStagCreate3d(MPI_Comm comm,DMBoundaryType bndx,DMBoundaryType bndy,DMBoundaryType bndz,PetscInt M,PetscInt N,PetscInt P,PetscInt m,PetscInt n,PetscInt p,PetscInt dof0,PetscInt dof1,PetscInt dof2,PetscInt dof3,DMStagStencilType stencilType,PetscInt stencilWidth,const PetscInt lx[],const PetscInt ly[],const PetscInt lz[],DM* dm)
48 {
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = DMCreate(comm,dm);CHKERRQ(ierr);
53   ierr = DMSetDimension(*dm,3);CHKERRQ(ierr);
54   ierr = DMStagInitialize(bndx,bndy,bndz,M,N,P,m,n,p,dof0,dof1,dof2,dof3,stencilType,stencilWidth,lx,ly,lz,*dm);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
DMStagSetUniformCoordinatesExplicit_3d(DM dm,PetscReal xmin,PetscReal xmax,PetscReal ymin,PetscReal ymax,PetscReal zmin,PetscReal zmax)58 PETSC_INTERN PetscErrorCode DMStagSetUniformCoordinatesExplicit_3d(DM dm,PetscReal xmin,PetscReal xmax,PetscReal ymin,PetscReal ymax,PetscReal zmin,PetscReal zmax)
59 {
60   PetscErrorCode ierr;
61   DM_Stag        *stagCoord;
62   DM             dmCoord;
63   Vec            coordLocal,coord;
64   PetscReal      h[3],min[3];
65   PetscScalar    ****arr;
66   PetscInt       ind[3],start[3],n[3],nExtra[3],s,c;
67   PetscInt       ibackdownleft,ibackdown,ibackleft,iback,idownleft,idown,ileft,ielement;
68 
69   PetscFunctionBegin;
70   ierr = DMGetCoordinateDM(dm,&dmCoord);CHKERRQ(ierr);
71   stagCoord = (DM_Stag*) dmCoord->data;
72   for (s=0; s<4; ++s) {
73     if (stagCoord->dof[s] !=0 && stagCoord->dof[s] != 3) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_PLIB,"Coordinate DM in 3 dimensions must have 0 or 3 dof on each stratum, but stratum %d has %d dof",s,stagCoord->dof[s]);
74   }
75   ierr = DMGetLocalVector(dmCoord,&coordLocal);CHKERRQ(ierr);
76   ierr = DMStagVecGetArray(dmCoord,coordLocal,&arr);CHKERRQ(ierr);
77   if (stagCoord->dof[0]) {
78     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_BACK_DOWN_LEFT,0,&ibackdownleft);CHKERRQ(ierr);
79   }
80   if (stagCoord->dof[1]) {
81     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_BACK_DOWN     ,0,&ibackdown);CHKERRQ(ierr);
82     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_BACK_LEFT     ,0,&ibackleft);CHKERRQ(ierr);
83     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_DOWN_LEFT     ,0,&idownleft);CHKERRQ(ierr);
84   }
85   if (stagCoord->dof[2]) {
86     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_BACK          ,0,&iback);CHKERRQ(ierr);
87     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_DOWN          ,0,&idown);CHKERRQ(ierr);
88     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_LEFT          ,0,&ileft);CHKERRQ(ierr);
89   }
90   if (stagCoord->dof[3]) {
91     ierr = DMStagGetLocationSlot(dmCoord,DMSTAG_ELEMENT       ,0,&ielement);CHKERRQ(ierr);
92     ierr = DMStagGetCorners(dmCoord,&start[0],&start[1],&start[2],&n[0],&n[1],&n[2],&nExtra[0],&nExtra[1],&nExtra[2]);CHKERRQ(ierr);
93   }
94   min[0] = xmin; min[1]= ymin; min[2] = zmin;
95   h[0] = (xmax-xmin)/stagCoord->N[0];
96   h[1] = (ymax-ymin)/stagCoord->N[1];
97   h[2] = (zmax-zmin)/stagCoord->N[2];
98 
99   for (ind[2]=start[2]; ind[2]<start[2] + n[2] + nExtra[2]; ++ind[2]) {
100     for (ind[1]=start[1]; ind[1]<start[1] + n[1] + nExtra[1]; ++ind[1]) {
101       for (ind[0]=start[0]; ind[0]<start[0] + n[0] + nExtra[0]; ++ind[0]) {
102         if (stagCoord->dof[0]) {
103           const PetscReal offs[3] = {0.0,0.0,0.0};
104           for (c=0; c<3; ++c) {
105             arr[ind[2]][ind[1]][ind[0]][ibackdownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
106           }
107         }
108         if (stagCoord->dof[1]) {
109           const PetscReal offs[3] = {0.5,0.0,0.0};
110           for (c=0; c<3; ++c) {
111             arr[ind[2]][ind[1]][ind[0]][ibackdown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
112           }
113         }
114         if (stagCoord->dof[1]) {
115           const PetscReal offs[3] = {0.0,0.5,0.0};
116           for (c=0; c<3; ++c) {
117             arr[ind[2]][ind[1]][ind[0]][ibackleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
118           }
119         }
120         if (stagCoord->dof[2]) {
121           const PetscReal offs[3] = {0.5,0.5,0.0};
122           for (c=0; c<3; ++c) {
123             arr[ind[2]][ind[1]][ind[0]][iback + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
124           }
125         }
126         if (stagCoord->dof[1]) {
127           const PetscReal offs[3] = {0.0,0.0,0.5};
128           for (c=0; c<3; ++c) {
129             arr[ind[2]][ind[1]][ind[0]][idownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
130           }
131         }
132         if (stagCoord->dof[2]) {
133           const PetscReal offs[3] = {0.5,0.0,0.5};
134           for (c=0; c<3; ++c) {
135             arr[ind[2]][ind[1]][ind[0]][idown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
136           }
137         }
138         if (stagCoord->dof[2]) {
139           const PetscReal offs[3] = {0.0,0.5,0.5};
140           for (c=0; c<3; ++c) {
141             arr[ind[2]][ind[1]][ind[0]][ileft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
142           }
143         }
144         if (stagCoord->dof[3]) {
145           const PetscReal offs[3] = {0.5,0.5,0.5};
146           for (c=0; c<3; ++c) {
147             arr[ind[2]][ind[1]][ind[0]][ielement + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
148           }
149         }
150       }
151     }
152   }
153   ierr = DMStagVecRestoreArray(dmCoord,coordLocal,&arr);CHKERRQ(ierr);
154   ierr = DMCreateGlobalVector(dmCoord,&coord);CHKERRQ(ierr);
155   ierr = DMLocalToGlobalBegin(dmCoord,coordLocal,INSERT_VALUES,coord);CHKERRQ(ierr);
156   ierr = DMLocalToGlobalEnd(dmCoord,coordLocal,INSERT_VALUES,coord);CHKERRQ(ierr);
157   ierr = DMSetCoordinates(dm,coord);CHKERRQ(ierr);
158   ierr = PetscLogObjectParent((PetscObject)dm,(PetscObject)coord);CHKERRQ(ierr);
159   ierr = DMRestoreLocalVector(dmCoord,&coordLocal);CHKERRQ(ierr);
160   ierr = VecDestroy(&coord);CHKERRQ(ierr);
161   PetscFunctionReturn(0);
162 }
163 
164 /* Helper functions used in DMSetUp_Stag() */
165 static PetscErrorCode DMStagSetUpBuildRankGrid_3d(DM);
166 static PetscErrorCode DMStagSetUpBuildNeighbors_3d(DM);
167 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_3d(DM,PetscInt**);
168 static PetscErrorCode DMStagSetUpBuildScatter_3d(DM,const PetscInt*);
169 static PetscErrorCode DMStagSetUpBuildL2G_3d(DM,const PetscInt*);
170 static PetscErrorCode DMStagComputeLocationOffsets_3d(DM);
171 
DMSetUp_Stag_3d(DM dm)172 PETSC_INTERN PetscErrorCode DMSetUp_Stag_3d(DM dm)
173 {
174   PetscErrorCode  ierr;
175   DM_Stag * const stag = (DM_Stag*)dm->data;
176   PetscMPIInt     rank;
177   PetscInt        i,j,d;
178   PetscInt        *globalOffsets;
179   const PetscInt  dim = 3;
180 
181   PetscFunctionBegin;
182   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)dm),&rank);CHKERRQ(ierr);
183 
184   /* Rank grid sizes (populates stag->nRanks) */
185   ierr = DMStagSetUpBuildRankGrid_3d(dm);CHKERRQ(ierr);
186 
187   /* Determine location of rank in grid */
188     stag->rank[0] = rank % stag->nRanks[0];
189     stag->rank[1] = rank % (stag->nRanks[0] * stag->nRanks[1]) / stag->nRanks[0];
190     stag->rank[2] = rank / (stag->nRanks[0] * stag->nRanks[1]);
191     for (d=0; d<dim; ++d) {
192       stag->firstRank[d] = PetscNot(stag->rank[d]);
193       stag->lastRank[d]  = (PetscBool)(stag->rank[d] == stag->nRanks[d]-1);
194     }
195 
196   /* Determine locally owned region (if not already prescribed).
197    Divide equally, giving lower ranks in each dimension and extra element if needbe.
198    Note that this uses O(P) storage. If this ever becomes an issue, this could
199    be refactored to not keep this data around.  */
200   for (i=0; i<dim; ++i) {
201     if (!stag->l[i]) {
202       const PetscInt Ni = stag->N[i], nRanksi = stag->nRanks[i];
203       ierr = PetscMalloc1(stag->nRanks[i],&stag->l[i]);CHKERRQ(ierr);
204       for (j=0; j<stag->nRanks[i]; ++j){
205         stag->l[i][j] = Ni/nRanksi + ((Ni % nRanksi) > j);
206       }
207     }
208   }
209 
210   /* Retrieve local size in stag->n */
211   for (i=0; i<dim; ++i) stag->n[i] = stag->l[i][stag->rank[i]];
212   if (PetscDefined(USE_DEBUG)) {
213     for (i=0; i<dim; ++i) {
214       PetscInt Ncheck,j;
215       Ncheck = 0;
216       for (j=0; j<stag->nRanks[i]; ++j) Ncheck += stag->l[i][j];
217       if (Ncheck != stag->N[i]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local sizes in dimension %d don't add up. %d != %d\n",i,Ncheck,stag->N[i]);
218     }
219   }
220 
221   /* Compute starting elements */
222   for (i=0; i<dim; ++i) {
223     stag->start[i] = 0;
224     for (j=0;j<stag->rank[i];++j){
225       stag->start[i] += stag->l[i][j];
226     }
227   }
228 
229   /* Determine ranks of neighbors */
230   ierr = DMStagSetUpBuildNeighbors_3d(dm);CHKERRQ(ierr);
231 
232   /* Define useful sizes */
233   {
234     PetscInt entriesPerEdge,entriesPerFace,entriesPerCorner,entriesPerElementRow,entriesPerElementLayer;
235     PetscBool dummyEnd[3];
236     for (d=0; d<3; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
237     stag->entriesPerElement = stag->dof[0] + 3*stag->dof[1] + 3*stag->dof[2] + stag->dof[3];
238     entriesPerFace          = stag->dof[0] + 2*stag->dof[1] + stag->dof[2];
239     entriesPerEdge          = stag->dof[0] + stag->dof[1];
240     entriesPerCorner        = stag->dof[0];
241     entriesPerElementRow    = stag->n[0]*stag->entriesPerElement
242                               + (dummyEnd[0]                               ? entriesPerFace                       : 0);
243     entriesPerElementLayer  = stag->n[1]*entriesPerElementRow
244                               + (dummyEnd[1]                               ? stag->n[0] * entriesPerFace          : 0)
245                               + (dummyEnd[1] && dummyEnd[0]                ? entriesPerEdge                       : 0);
246     stag->entries           = stag->n[2]*entriesPerElementLayer
247                               + (dummyEnd[2]                               ? stag->n[0]*stag->n[1]*entriesPerFace : 0)
248                               + (dummyEnd[2] && dummyEnd[0]                ? stag->n[1]*entriesPerEdge            : 0)
249                               + (dummyEnd[2] && dummyEnd[1]                ? stag->n[0]*entriesPerEdge            : 0)
250                               + (dummyEnd[2] && dummyEnd[1] && dummyEnd[0] ? entriesPerCorner                     : 0);
251   }
252 
253   /* Check that we will not overflow 32 bit indices (slightly overconservative) */
254   if (!PetscDefined(USE_64BIT_INDICES)) {
255     if (((PetscInt64) stag->n[0])*((PetscInt64) stag->n[1])*((PetscInt64) stag->n[2])*((PetscInt64) stag->entriesPerElement) > (PetscInt64) PETSC_MPI_INT_MAX) SETERRQ4(PetscObjectComm((PetscObject)dm),PETSC_ERR_INT_OVERFLOW,"Mesh of %D x %D x %D with %D entries per (interior) element is likely too large for 32 bit indices",stag->n[0],stag->n[1],stag->n[2],stag->entriesPerElement);
256   }
257 
258   /* Compute offsets for each rank into global vectors
259 
260     This again requires O(P) storage, which could be replaced with some global
261     communication.
262   */
263   ierr = DMStagSetUpBuildGlobalOffsets_3d(dm,&globalOffsets);CHKERRQ(ierr);
264 
265   for (d=0; d<dim; ++d) if (stag->boundaryType[d] != DM_BOUNDARY_NONE && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->boundaryType[d] != DM_BOUNDARY_GHOSTED) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported boundary type");
266 
267   /* Define ghosted/local sizes */
268   for (d=0; d<dim; ++d) {
269     switch (stag->boundaryType[d]) {
270       case DM_BOUNDARY_NONE:
271         /* Note: for a elements-only DMStag, the extra elements on the edges aren't necessary but we include them anyway */
272         switch (stag->stencilType) {
273           case DMSTAG_STENCIL_NONE : /* only the extra one on the right/top edges */
274             stag->nGhost[d] = stag->n[d];
275             stag->startGhost[d] = stag->start[d];
276             if (stag->lastRank[d]) stag->nGhost[d] += 1;
277             break;
278           case DMSTAG_STENCIL_STAR : /* allocate the corners but don't use them */
279           case DMSTAG_STENCIL_BOX :
280             stag->nGhost[d] = stag->n[d];
281             stag->startGhost[d] = stag->start[d];
282             if (!stag->firstRank[d]) {
283               stag->nGhost[d]     += stag->stencilWidth; /* add interior ghost elements */
284               stag->startGhost[d] -= stag->stencilWidth;
285             }
286             if (!stag->lastRank[d]) {
287               stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
288             } else {
289               stag->nGhost[d] += 1; /* one element on the boundary to complete blocking */
290             }
291             break;
292           default :
293             SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType);
294         }
295         break;
296       case DM_BOUNDARY_GHOSTED:
297         switch (stag->stencilType) {
298           case DMSTAG_STENCIL_NONE :
299             stag->startGhost[d] = stag->start[d];
300             stag->nGhost[d]     = stag->n[d] + (stag->lastRank[d] ? 1 : 0);
301             break;
302           case DMSTAG_STENCIL_STAR :
303           case DMSTAG_STENCIL_BOX :
304             stag->startGhost[d] = stag->start[d] - stag->stencilWidth; /* This value may be negative */
305             stag->nGhost[d]     = stag->n[d] + 2*stag->stencilWidth + (stag->lastRank[d] && stag->stencilWidth == 0 ? 1 : 0);
306             break;
307           default :
308             SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType);
309         }
310         break;
311       case DM_BOUNDARY_PERIODIC:
312         switch (stag->stencilType) {
313           case DMSTAG_STENCIL_NONE : /* only the extra one on the right/top edges */
314             stag->nGhost[d] = stag->n[d];
315             stag->startGhost[d] = stag->start[d];
316             break;
317           case DMSTAG_STENCIL_STAR :
318           case DMSTAG_STENCIL_BOX :
319             stag->nGhost[d] = stag->n[d] + 2*stag->stencilWidth;
320             stag->startGhost[d] = stag->start[d] - stag->stencilWidth;
321             break;
322           default :
323             SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unrecognized ghost stencil type %d",stag->stencilType);
324         }
325         break;
326       default: SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Unsupported boundary type in dimension %D",d);
327     }
328   }
329   stag->entriesGhost = stag->nGhost[0]*stag->nGhost[1]*stag->nGhost[2]*stag->entriesPerElement;
330 
331   /* Create global-->local VecScatter and local->global ISLocalToGlobalMapping */
332   ierr = DMStagSetUpBuildScatter_3d(dm,globalOffsets);CHKERRQ(ierr);
333   ierr = DMStagSetUpBuildL2G_3d(dm,globalOffsets);CHKERRQ(ierr);
334 
335   /* In special cases, create a dedicated injective local-to-global map */
336   if ((stag->boundaryType[0] == DM_BOUNDARY_PERIODIC && stag->nRanks[0] == 1) ||
337       (stag->boundaryType[1] == DM_BOUNDARY_PERIODIC && stag->nRanks[1] == 1) ||
338       (stag->boundaryType[2] == DM_BOUNDARY_PERIODIC && stag->nRanks[2] == 1)) {
339     ierr = DMStagPopulateLocalToGlobalInjective(dm);CHKERRQ(ierr);
340   }
341 
342   /* Free global offsets */
343   ierr = PetscFree(globalOffsets);CHKERRQ(ierr);
344 
345   /* Precompute location offsets */
346   ierr = DMStagComputeLocationOffsets_3d(dm);CHKERRQ(ierr);
347 
348   /* View from Options */
349   ierr = DMViewFromOptions(dm,NULL,"-dm_view");CHKERRQ(ierr);
350 
351   PetscFunctionReturn(0);
352 }
353 
354 /* adapted from da3.c */
DMStagSetUpBuildRankGrid_3d(DM dm)355 static PetscErrorCode DMStagSetUpBuildRankGrid_3d(DM dm)
356 {
357   PetscErrorCode  ierr;
358   PetscMPIInt     rank,size;
359   PetscInt        m,n,p,pm;
360   DM_Stag * const stag = (DM_Stag*)dm->data;
361   const PetscInt M = stag->N[0];
362   const PetscInt N = stag->N[1];
363   const PetscInt P = stag->N[2];
364 
365   PetscFunctionBegin;
366   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)dm),&size);CHKERRQ(ierr);
367   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)dm),&rank);CHKERRQ(ierr);
368 
369   m = stag->nRanks[0];
370   n = stag->nRanks[1];
371   p = stag->nRanks[2];
372 
373   if (m != PETSC_DECIDE) {
374     if (m < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m);
375     else if (m > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size);
376   }
377   if (n != PETSC_DECIDE) {
378     if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n);
379     else if (n > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size);
380   }
381   if (p != PETSC_DECIDE) {
382     if (p < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Z direction: %D",p);
383     else if (p > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Z direction: %D %d",p,size);
384   }
385   if ((m > 0) && (n > 0) && (p > 0) && (m*n*p != size)) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"m %D * n %D * p %D != size %d",m,n,p,size);
386 
387   /* Partition the array among the processors */
388   if (m == PETSC_DECIDE && n != PETSC_DECIDE && p != PETSC_DECIDE) {
389     m = size/(n*p);
390   } else if (m != PETSC_DECIDE && n == PETSC_DECIDE && p != PETSC_DECIDE) {
391     n = size/(m*p);
392   } else if (m != PETSC_DECIDE && n != PETSC_DECIDE && p == PETSC_DECIDE) {
393     p = size/(m*n);
394   } else if (m == PETSC_DECIDE && n == PETSC_DECIDE && p != PETSC_DECIDE) {
395     /* try for squarish distribution */
396     m = (int)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N*p)));
397     if (!m) m = 1;
398     while (m > 0) {
399       n = size/(m*p);
400       if (m*n*p == size) break;
401       m--;
402     }
403     if (!m) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"bad p value: p = %D",p);
404     if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;}
405   } else if (m == PETSC_DECIDE && n != PETSC_DECIDE && p == PETSC_DECIDE) {
406     /* try for squarish distribution */
407     m = (int)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)P*n)));
408     if (!m) m = 1;
409     while (m > 0) {
410       p = size/(m*n);
411       if (m*n*p == size) break;
412       m--;
413     }
414     if (!m) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"bad n value: n = %D",n);
415     if (M > P && m < p) {PetscInt _m = m; m = p; p = _m;}
416   } else if (m != PETSC_DECIDE && n == PETSC_DECIDE && p == PETSC_DECIDE) {
417     /* try for squarish distribution */
418     n = (int)(0.5 + PetscSqrtReal(((PetscReal)N)*((PetscReal)size)/((PetscReal)P*m)));
419     if (!n) n = 1;
420     while (n > 0) {
421       p = size/(m*n);
422       if (m*n*p == size) break;
423       n--;
424     }
425     if (!n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"bad m value: m = %D",n);
426     if (N > P && n < p) {PetscInt _n = n; n = p; p = _n;}
427   } else if (m == PETSC_DECIDE && n == PETSC_DECIDE && p == PETSC_DECIDE) {
428     /* try for squarish distribution */
429     n = (PetscInt)(0.5 + PetscPowReal(((PetscReal)N*N)*((PetscReal)size)/((PetscReal)P*M),(PetscReal)(1./3.)));
430     if (!n) n = 1;
431     while (n > 0) {
432       pm = size/n;
433       if (n*pm == size) break;
434       n--;
435     }
436     if (!n) n = 1;
437     m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)P*n)));
438     if (!m) m = 1;
439     while (m > 0) {
440       p = size/(m*n);
441       if (m*n*p == size) break;
442       m--;
443     }
444     if (M > P && m < p) {PetscInt _m = m; m = p; p = _m;}
445   } else if (m*n*p != size) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition");
446 
447   if (m*n*p != size) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_PLIB,"Could not find good partition");
448   if (M < m) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m);
449   if (N < n) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n);
450   if (P < p) SETERRQ2(PetscObjectComm((PetscObject)dm),PETSC_ERR_ARG_OUTOFRANGE,"Partition in z direction is too fine! %D %D",P,p);
451 
452   stag->nRanks[0] = m;
453   stag->nRanks[1] = n;
454   stag->nRanks[2] = p;
455   PetscFunctionReturn(0);
456 }
457 
458 /* Determine ranks of neighbors, using DMDA's convention
459 
460         n24 n25 n26
461         n21 n22 n23
462         n18 n19 n20 (Front, bigger z)
463 
464         n15 n16 n17
465         n12     n14   ^ y
466         n9  n10 n11   |
467                       +--> x
468         n6  n7  n8
469         n3  n4  n5
470         n0  n1  n2 (Back, smaller z) */
DMStagSetUpBuildNeighbors_3d(DM dm)471 static PetscErrorCode DMStagSetUpBuildNeighbors_3d(DM dm)
472 {
473   PetscErrorCode  ierr;
474   DM_Stag * const stag = (DM_Stag*)dm->data;
475   PetscInt        d,i;
476   PetscBool       per[3],first[3],last[3];
477   PetscInt        neighborRank[27][3],r[3],n[3];
478   const PetscInt  dim = 3;
479 
480   PetscFunctionBegin;
481   for (d=0; d<dim; ++d) if (stag->boundaryType[d] != DM_BOUNDARY_NONE && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->boundaryType[d] != DM_BOUNDARY_GHOSTED) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Neighbor determination not implemented for %s",DMBoundaryTypes[stag->boundaryType[d]]);
482 
483   /* Assemble some convenience variables */
484   for (d=0; d<dim; ++d) {
485     per[d]   = (PetscBool)(stag->boundaryType[d] == DM_BOUNDARY_PERIODIC);
486     first[d] = stag->firstRank[d];
487     last[d]  = stag->lastRank[d];
488     r[d]     = stag->rank[d];
489     n[d]     = stag->nRanks[d];
490   }
491 
492   /* First, compute the position in the rank grid for all neighbors */
493 
494   neighborRank[0][0]  = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  down back  */
495   neighborRank[0][1]  = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
496   neighborRank[0][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
497 
498   neighborRank[1][0]  =                                      r[0]    ; /*       down back  */
499   neighborRank[1][1]  = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
500   neighborRank[1][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
501 
502   neighborRank[2][0]  = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right down back  */
503   neighborRank[2][1]  = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
504   neighborRank[2][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
505 
506   neighborRank[3][0]  = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left       back  */
507   neighborRank[3][1]  =                                      r[1]    ;
508   neighborRank[3][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
509 
510   neighborRank[4][0]  =                                      r[0]    ; /*            back  */
511   neighborRank[4][1]  =                                      r[1]    ;
512   neighborRank[4][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
513 
514   neighborRank[5][0]  = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right      back  */
515   neighborRank[5][1]  =                                      r[1]    ;
516   neighborRank[5][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
517 
518   neighborRank[6][0]  = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  up   back  */
519   neighborRank[6][1]  = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
520   neighborRank[6][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
521 
522   neighborRank[7][0]  =                                      r[0]    ; /*       up   back  */
523   neighborRank[7][1]  = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
524   neighborRank[7][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
525 
526   neighborRank[8][0]  = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right up   back  */
527   neighborRank[8][1]  = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
528   neighborRank[8][2]  = first[2] ? (per[2] ?  n[2]-1 : -1) : r[2] - 1;
529 
530   neighborRank[9][0]  = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  down       */
531   neighborRank[9][1]  = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
532   neighborRank[9][2]  =                                      r[2]    ;
533 
534   neighborRank[10][0] =                                      r[0]    ; /*       down       */
535   neighborRank[10][1] = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
536   neighborRank[10][2] =                                      r[2]    ;
537 
538   neighborRank[11][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right down       */
539   neighborRank[11][1] = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
540   neighborRank[11][2] =                                      r[2]    ;
541 
542   neighborRank[12][0] = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left             */
543   neighborRank[12][1] =                                      r[1]    ;
544   neighborRank[12][2] =                                      r[2]    ;
545 
546   neighborRank[13][0] =                                      r[0]    ; /*                  */
547   neighborRank[13][1] =                                      r[1]    ;
548   neighborRank[13][2] =                                      r[2]    ;
549 
550   neighborRank[14][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right            */
551   neighborRank[14][1] =                                      r[1]    ;
552   neighborRank[14][2] =                                      r[2]    ;
553 
554   neighborRank[15][0] = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  up         */
555   neighborRank[15][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
556   neighborRank[15][2] =                                      r[2]    ;
557 
558   neighborRank[16][0] =                                      r[0]    ; /*       up         */
559   neighborRank[16][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
560   neighborRank[16][2] =                                      r[2]    ;
561 
562   neighborRank[17][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right up         */
563   neighborRank[17][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
564   neighborRank[17][2] =                                      r[2]    ;
565 
566   neighborRank[18][0] = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  down front */
567   neighborRank[18][1] = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
568   neighborRank[18][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
569 
570   neighborRank[19][0] =                                      r[0]    ; /*       down front */
571   neighborRank[19][1] = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
572   neighborRank[19][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
573 
574   neighborRank[20][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right down front */
575   neighborRank[20][1] = first[1] ? (per[1] ?  n[1]-1 : -1) : r[1] - 1;
576   neighborRank[20][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
577 
578   neighborRank[21][0] = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left       front */
579   neighborRank[21][1] =                                      r[1]    ;
580   neighborRank[21][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
581 
582   neighborRank[22][0] =                                      r[0]    ; /*            front */
583   neighborRank[22][1] =                                      r[1]    ;
584   neighborRank[22][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
585 
586   neighborRank[23][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right      front */
587   neighborRank[23][1] =                                      r[1]    ;
588   neighborRank[23][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
589 
590   neighborRank[24][0] = first[0] ? (per[0] ?  n[0]-1 : -1) : r[0] - 1; /* left  up   front */
591   neighborRank[24][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
592   neighborRank[24][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
593 
594   neighborRank[25][0] =                                      r[0]    ; /*       up   front */
595   neighborRank[25][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
596   neighborRank[25][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
597 
598   neighborRank[26][0] = last[0]  ? (per[0] ?  0      : -1) : r[0] + 1; /* right up   front */
599   neighborRank[26][1] = last[1]  ? (per[1] ?  0      : -1) : r[1] + 1;
600   neighborRank[26][2] = last[2]  ? (per[2] ?  0      : -1) : r[2] + 1;
601 
602   /* Then, compute the rank of each in the linear ordering */
603   ierr = PetscMalloc1(27,&stag->neighbors);CHKERRQ(ierr);
604   for (i=0; i<27; ++i){
605     if  (neighborRank[i][0] >= 0 && neighborRank[i][1] >=0 && neighborRank[i][2] >=0) {
606       stag->neighbors[i] = neighborRank[i][0] + n[0]*neighborRank[i][1] + n[0]*n[1]*neighborRank[i][2];
607     } else {
608       stag->neighbors[i] = -1;
609     }
610   }
611 
612   PetscFunctionReturn(0);
613 }
614 
DMStagSetUpBuildGlobalOffsets_3d(DM dm,PetscInt ** pGlobalOffsets)615 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_3d(DM dm,PetscInt **pGlobalOffsets)
616 {
617   PetscErrorCode        ierr;
618   const DM_Stag * const stag = (DM_Stag*)dm->data;
619   PetscInt              *globalOffsets;
620   PetscInt              i,j,k,d,entriesPerEdge,entriesPerFace,count;
621   PetscMPIInt           size;
622   PetscBool             extra[3];
623 
624   PetscFunctionBegin;
625   for (d=0; d<3; ++d) extra[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC); /* Extra points in global rep */
626   entriesPerFace               = stag->dof[0] + 2*stag->dof[1] + stag->dof[2];
627   entriesPerEdge               = stag->dof[0] + stag->dof[1];
628   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)dm),&size);CHKERRQ(ierr);
629   ierr = PetscMalloc1(size,pGlobalOffsets);CHKERRQ(ierr);
630   globalOffsets = *pGlobalOffsets;
631   globalOffsets[0] = 0;
632   count = 1; /* note the count is offset by 1 here. We add the size of the previous rank */
633   for (k=0; k<stag->nRanks[2]-1; ++k){
634     const PetscInt nnk = stag->l[2][k];
635     for (j=0; j<stag->nRanks[1]-1; ++j) {
636       const PetscInt nnj = stag->l[1][j];
637       for (i=0; i<stag->nRanks[0]-1; ++i) {
638         const PetscInt nni = stag->l[0][i];
639         /* Interior : No right/top/front boundaries */
640         globalOffsets[count] = globalOffsets[count-1] + nni*nnj*nnk*stag->entriesPerElement;
641         ++count;
642       }
643       {
644         /* Right boundary - extra faces */
645         /* i = stag->nRanks[0]-1; */
646         const PetscInt nni = stag->l[0][i];
647         globalOffsets[count] = globalOffsets[count-1] + nnj*nni*nnk*stag->entriesPerElement
648                                + (extra[0] ? nnj*nnk*entriesPerFace : 0);
649         ++count;
650       }
651     }
652     {
653       /* j = stag->nRanks[1]-1; */
654       const PetscInt nnj = stag->l[1][j];
655       for (i=0; i<stag->nRanks[0]-1; ++i) {
656         const PetscInt nni = stag->l[0][i];
657         /* Up boundary - extra faces */
658         globalOffsets[count] = globalOffsets[count-1] + nnj*nni*nnk*stag->entriesPerElement
659                                + (extra[1] ? nni*nnk*entriesPerFace : 0);
660         ++count;
661       }
662       {
663         /* i = stag->nRanks[0]-1; */
664         const PetscInt nni = stag->l[0][i];
665         /* Up right boundary - 2x extra faces and extra edges */
666         globalOffsets[count] = globalOffsets[count-1] + nnj*nni*nnk*stag->entriesPerElement
667                                + (extra[0]             ? nnj*nnk*entriesPerFace : 0)
668                                + (extra[1]             ? nni*nnk*entriesPerFace : 0)
669                                + (extra[0] && extra[1] ? nnk*entriesPerEdge     : 0);
670         ++count;
671       }
672     }
673   }
674   {
675     /* k = stag->nRanks[2]-1; */
676     const PetscInt nnk = stag->l[2][k];
677     for (j=0; j<stag->nRanks[1]-1; ++j) {
678       const PetscInt nnj = stag->l[1][j];
679       for (i=0; i<stag->nRanks[0]-1; ++i) {
680         const PetscInt nni = stag->l[0][i];
681         /* Front boundary - extra faces */
682         globalOffsets[count] = globalOffsets[count-1] + nnj*nni*nnk*stag->entriesPerElement
683                                + (extra[2] ? nni*nnj*entriesPerFace : 0);
684         ++count;
685       }
686       {
687         /* i = stag->nRanks[0]-1; */
688         const PetscInt nni = stag->l[0][i];
689         /* Front right boundary - 2x extra faces and extra edges */
690         globalOffsets[count] = globalOffsets[count-1] + nnj*nni*nnk*stag->entriesPerElement
691                                + (extra[0]             ? nnk*nnj*entriesPerFace : 0)
692                                + (extra[2]             ? nni*nnj*entriesPerFace : 0)
693                                + (extra[0] && extra[2] ? nnj*entriesPerEdge     : 0);
694         ++count;
695       }
696     }
697     {
698       /* j = stag->nRanks[1]-1; */
699       const PetscInt nnj = stag->l[1][j];
700       for (i=0; i<stag->nRanks[0]-1; ++i) {
701         const PetscInt nni = stag->l[0][i];
702         /* Front up boundary - 2x extra faces and extra edges */
703         globalOffsets[count] = globalOffsets[count-1] + nnj*nni*nnk*stag->entriesPerElement
704                                + (extra[1]             ? nnk*nni*entriesPerFace : 0)
705                                + (extra[2]             ? nnj*nni*entriesPerFace : 0)
706                                + (extra[1] && extra[2] ? nni*entriesPerEdge     : 0);
707         ++count;
708       }
709       /* Don't need to compute entries in last element */
710     }
711   }
712 
713   PetscFunctionReturn(0);
714 }
715 
716 /* A helper function to reduce code duplication as we loop over various ranges.
717    i,j,k refer to element numbers on the rank where an element lives in the global
718    representation (without ghosts) to be offset by a global offset per rank.
719    ig,jg,kg refer to element numbers in the local (this rank) ghosted numbering.
720    Note that this function could easily be converted to a macro (it does not compute
721    anything except loop indices and the values of idxGlobal and idxLocal).  */
DMStagSetUpBuildScatterPopulateIdx_3d(DM_Stag * stag,PetscInt * count,PetscInt * idxLocal,PetscInt * idxGlobal,PetscInt entriesPerEdge,PetscInt entriesPerFace,PetscInt eprNeighbor,PetscInt eplNeighbor,PetscInt eprGhost,PetscInt eplGhost,PetscInt epFaceRow,PetscInt globalOffset,PetscInt startx,PetscInt starty,PetscInt startz,PetscInt startGhostx,PetscInt startGhosty,PetscInt startGhostz,PetscInt endGhostx,PetscInt endGhosty,PetscInt endGhostz,PetscBool extrax,PetscBool extray,PetscBool extraz)722 static PetscErrorCode DMStagSetUpBuildScatterPopulateIdx_3d(DM_Stag *stag,PetscInt *count,PetscInt *idxLocal,PetscInt *idxGlobal,PetscInt entriesPerEdge, PetscInt entriesPerFace,PetscInt eprNeighbor,PetscInt eplNeighbor,PetscInt eprGhost,PetscInt eplGhost,PetscInt epFaceRow,PetscInt globalOffset,PetscInt startx,PetscInt starty,PetscInt startz,PetscInt startGhostx,PetscInt startGhosty,PetscInt startGhostz,PetscInt endGhostx,PetscInt endGhosty,PetscInt endGhostz, PetscBool extrax,PetscBool extray,PetscBool extraz)
723 {
724   PetscInt ig,jg,kg,d,c;
725 
726   PetscFunctionBegin;
727   c = *count;
728   for (kg = startGhostz; kg < endGhostz; ++kg) {
729     const PetscInt k = kg - startGhostz + startz;
730     for (jg = startGhosty; jg < endGhosty; ++jg) {
731       const PetscInt j = jg - startGhosty + starty;
732       for (ig = startGhostx; ig<endGhostx; ++ig) {
733         const PetscInt i = ig - startGhostx + startx;
734         for (d=0; d<stag->entriesPerElement; ++d,++c) {
735           idxGlobal[c] = globalOffset + k *eplNeighbor + j *eprNeighbor + i *stag->entriesPerElement + d;
736           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost    + ig*stag->entriesPerElement + d;
737         }
738       }
739       if (extrax) {
740         PetscInt dLocal;
741         const PetscInt i = endGhostx - startGhostx + startx;
742         ig = endGhostx;
743         for (d=0,dLocal=0; d<stag->dof[0]; ++d, ++dLocal, ++c) { /* Vertex */
744           idxGlobal[c] = globalOffset + k *eplNeighbor + j*eprNeighbor + i *stag->entriesPerElement + d;
745           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost   + ig*stag->entriesPerElement + dLocal;
746         }
747         dLocal += stag->dof[1]; /* Skip back down edge */
748         for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++c) { /* Back left edge */
749           idxGlobal[c] = globalOffset + k *eplNeighbor + j *eprNeighbor + i *stag->entriesPerElement + d;
750           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost    + ig*stag->entriesPerElement + dLocal;
751         }
752         dLocal += stag->dof[2]; /* Skip back face */
753         for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++c) { /* Down left edge */
754           idxGlobal[c] = globalOffset + k *eplNeighbor + j *eprNeighbor + i *stag->entriesPerElement + d;
755           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost    + ig*stag->entriesPerElement + dLocal;
756         }
757         dLocal += stag->dof[2]; /* Skip down face */
758         for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++c) { /* Left face */
759           idxGlobal[c] = globalOffset + k *eplNeighbor + j *eprNeighbor + i *stag->entriesPerElement + d;
760           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost    + ig*stag->entriesPerElement + dLocal;
761         }
762         /* Skip element */
763       }
764     }
765     if (extray) {
766       const PetscInt j = endGhosty - startGhosty + starty;
767       jg = endGhosty;
768       for (ig = startGhostx; ig<endGhostx; ++ig) {
769         const PetscInt i = ig - startGhostx + startx;
770         /* Vertex and Back down edge */
771         PetscInt dLocal;
772         for (d=0,dLocal=0; d<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++c) { /* Vertex */
773           idxGlobal[c] = globalOffset + k *eplNeighbor + j *eprNeighbor + i *entriesPerFace          + d; /* Note face increment in  x */
774           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost    + ig*stag->entriesPerElement + dLocal;
775         }
776         /* Skip back left edge and back face */
777         dLocal += stag->dof[1] + stag->dof[2];
778         /* Down face and down left edge */
779         for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++c) { /* Back left edge */
780           idxGlobal[c] = globalOffset + k *eplNeighbor + j*eprNeighbor + i *entriesPerFace          + d; /* Note face increment in x */
781           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost   + ig*stag->entriesPerElement + dLocal;
782         }
783         /* Skip left face and element */
784       }
785       if (extrax) {
786         PetscInt dLocal;
787         const PetscInt i = endGhostx - startGhostx + startx;
788         ig = endGhostx;
789         for (d=0,dLocal=0; d<stag->dof[0]; ++d, ++dLocal, ++c) { /* Vertex */
790           idxGlobal[c] = globalOffset + k *eplNeighbor + j *eprNeighbor + i *entriesPerFace          + d; /* Note face increment in x */
791           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost    + ig*stag->entriesPerElement + dLocal;
792         }
793         dLocal += 2*stag->dof[1]+stag->dof[2]; /* Skip back down edge, back face, and back left edge */
794         for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++c) { /* Down left edge */
795           idxGlobal[c] = globalOffset + k *eplNeighbor + j *eprNeighbor + i *entriesPerFace          + d; /* Note face increment in x */
796           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost    + ig*stag->entriesPerElement + dLocal;
797         }
798         /* Skip remaining entries */
799       }
800     }
801   }
802   if (extraz) {
803     const PetscInt k = endGhostz - startGhostz + startz;
804     kg = endGhostz;
805     for (jg = startGhosty; jg < endGhosty; ++jg) {
806       const PetscInt j = jg - startGhosty + starty;
807       for (ig = startGhostx; ig<endGhostx; ++ig) {
808         const PetscInt i = ig - startGhostx + startx;
809         for (d=0; d<entriesPerFace; ++d, ++c) {
810           idxGlobal[c] = globalOffset + k*eplNeighbor  + j *epFaceRow + i *entriesPerFace          + d; /* Note face-based x and y increments */
811           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost  + ig*stag->entriesPerElement + d;
812         }
813       }
814       if (extrax) {
815         PetscInt dLocal;
816         const PetscInt i = endGhostx - startGhostx + startx;
817         ig = endGhostx;
818         for (d=0,dLocal=0; d<stag->dof[0]; ++d, ++dLocal, ++c) { /* Vertex */
819           idxGlobal[c] = globalOffset + k*eplNeighbor + j *epFaceRow + i *entriesPerFace          + d; /* Note face-based x and y increments */
820           idxLocal[c]  =                kg*eplGhost   + jg*eprGhost  + ig*stag->entriesPerElement + dLocal;
821         }
822         dLocal += stag->dof[1]; /* Skip back down edge */
823         for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++c) { /* Back left edge */
824           idxGlobal[c] = globalOffset + k*eplNeighbor  + j *epFaceRow + i*entriesPerFace           + d; /* Note face-based x and y increments */
825           idxLocal[c]  =                kg*eplGhost    + jg*eprGhost  + ig*stag->entriesPerElement + dLocal;
826         }
827         /* Skip the rest */
828       }
829     }
830     if (extray) {
831       const PetscInt j = endGhosty - startGhosty + starty;
832       jg = endGhosty;
833       for (ig = startGhostx; ig<endGhostx; ++ig) {
834         const PetscInt i = ig - startGhostx + startx;
835         for (d=0; d<entriesPerEdge; ++d,++c) {
836           idxGlobal[c] = globalOffset + k*eplNeighbor + j *epFaceRow + i *entriesPerEdge          + d; /* Note face-based y increment and edge-based x increment */
837           idxLocal[c]  =                kg*eplGhost   + jg*eprGhost  + ig*stag->entriesPerElement + d;
838         }
839       }
840       if (extrax) {
841         const PetscInt i = endGhostx - startGhostx + startx;
842         ig = endGhostx;
843         for (d=0; d<stag->dof[0]; ++d, ++c) { /* Vertex (only) */
844           idxGlobal[c] = globalOffset + k*eplNeighbor + j *epFaceRow + i *entriesPerEdge          + d; /* Note face-based y increment and edge-based x increment */
845           idxLocal[c]  =                kg*eplGhost   + jg*eprGhost  + ig*stag->entriesPerElement + d;
846         }
847       }
848     }
849   }
850   *count = c;
851   PetscFunctionReturn(0);
852 }
853 
DMStagSetUpBuildScatter_3d(DM dm,const PetscInt * globalOffsets)854 static PetscErrorCode DMStagSetUpBuildScatter_3d(DM dm,const PetscInt *globalOffsets)
855 {
856   PetscErrorCode ierr;
857   DM_Stag * const stag = (DM_Stag*)dm->data;
858   PetscInt       d,ghostOffsetStart[3],ghostOffsetEnd[3],entriesPerCorner,entriesPerEdge,entriesPerFace,entriesToTransferTotal,count,eprGhost,eplGhost;
859   PetscInt       *idxLocal,*idxGlobal;
860   PetscMPIInt    rank;
861   PetscInt       nNeighbors[27][3];
862   PetscBool      star,nextToDummyEnd[3],dummyStart[3],dummyEnd[3];
863 
864   PetscFunctionBegin;
865   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)dm),&rank);CHKERRQ(ierr);
866   if (stag->n[0] < stag->stencilWidth || stag->n[1] < stag->stencilWidth || stag->n[2] < stag->stencilWidth) {
867     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"DMStag 3d setup does not support local sizes (%D x %D x %D) smaller than the elementwise stencil width (%D)",stag->n[0],stag->n[1],stag->n[2],stag->stencilWidth);
868   }
869 
870   /* Check stencil type */
871   if (stag->stencilType != DMSTAG_STENCIL_NONE && stag->stencilType != DMSTAG_STENCIL_BOX && stag->stencilType != DMSTAG_STENCIL_STAR) SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unsupported stencil type %s",DMStagStencilTypes[stag->stencilType]);
872   if (stag->stencilType == DMSTAG_STENCIL_NONE && stag->stencilWidth != 0) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"DMStag 3d setup requires stencil width 0 with stencil type none");
873   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR);
874 
875   /* Compute numbers of elements on each neighbor */
876   {
877     PetscInt i;
878     for (i=0; i<27; ++i) {
879       const PetscInt neighborRank = stag->neighbors[i];
880       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 13) */
881         nNeighbors[i][0] =  stag->l[0][neighborRank % stag->nRanks[0]];
882         nNeighbors[i][1] =  stag->l[1][neighborRank % (stag->nRanks[0] * stag->nRanks[1]) / stag->nRanks[0]];
883         nNeighbors[i][2] =  stag->l[2][neighborRank / (stag->nRanks[0] * stag->nRanks[1])];
884       } /* else leave uninitialized - error if accessed */
885     }
886   }
887 
888   /* These offsets should always be non-negative, and describe how many
889      ghost elements exist at each boundary. These are not always equal to the stencil width,
890      because we may have different numbers of ghost elements at the boundaries. In particular,
891      in the non-periodic casewe always have at least one ghost (dummy) element at the right/top/front. */
892   for (d=0; d<3; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
893   for (d=0; d<3; ++d) ghostOffsetEnd[d]   = stag->startGhost[d]+stag->nGhost[d] - (stag->start[d]+stag->n[d]);
894 
895   /* Determine whether the ghost region includes dummies or not. This is currently
896      equivalent to having a non-periodic boundary. If not, then
897      ghostOffset{Start,End}[d] elements correspond to elements on the neighbor.
898      If true, then
899      - at the start, there are ghostOffsetStart[d] ghost elements
900      - at the end, there is a layer of extra "physical" points inside a layer of
901        ghostOffsetEnd[d] ghost elements
902      Note that this computation should be updated if any boundary types besides
903      NONE, GHOSTED, and PERIODIC are supported.  */
904   for (d=0; d<3; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
905   for (d=0; d<3; ++d) dummyEnd[d]   = (PetscBool)(stag->lastRank[d]  && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
906 
907   /* Convenience variables */
908   entriesPerFace                      = stag->dof[0] + 2*stag->dof[1] + stag->dof[2];
909   entriesPerEdge                      = stag->dof[0] + stag->dof[1];
910   entriesPerCorner                    = stag->dof[0];
911   for (d=0;d<3;++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d]-2);
912   eprGhost                            = stag->nGhost[0]*stag->entriesPerElement; /* epr = entries per (element) row   */
913   eplGhost                            = stag->nGhost[1]*eprGhost;                /* epl = entries per (element) layer */
914 
915   /* Compute the number of local entries which correspond to any global entry */
916   {
917     PetscInt nNonDummyGhost[3];
918     for (d=0; d<3; ++d) nNonDummyGhost[d] = stag->nGhost[d] - (dummyStart[d] ? ghostOffsetStart[d] : 0) - (dummyEnd[d] ? ghostOffsetEnd[d] : 0);
919     if (star) {
920       entriesToTransferTotal = (
921           nNonDummyGhost[0] * stag->n[1]        * stag->n[2]
922         + stag->n[0]        * nNonDummyGhost[1] * stag->n[2]
923         + stag->n[0]        * stag->n[1]        * nNonDummyGhost[2]
924         - 2 * (stag->n[0] * stag->n[1] * stag->n[2])) * stag->entriesPerElement
925         + (dummyEnd[0]                               ? (nNonDummyGhost[1] * stag->n[2] + stag->n[1] * nNonDummyGhost[2] - stag->n[1] * stag->n[2]) * entriesPerFace   : 0)
926         + (dummyEnd[1]                               ? (nNonDummyGhost[0] * stag->n[2] + stag->n[0] * nNonDummyGhost[2] - stag->n[0] * stag->n[2]) * entriesPerFace   : 0)
927         + (dummyEnd[2]                               ? (nNonDummyGhost[0] * stag->n[1] + stag->n[0] * nNonDummyGhost[1] - stag->n[0] * stag->n[1]) * entriesPerFace   : 0)
928         + (dummyEnd[0] && dummyEnd[1]                ? nNonDummyGhost[2]                     * entriesPerEdge   : 0)
929         + (dummyEnd[2] && dummyEnd[0]                ? nNonDummyGhost[1]                     * entriesPerEdge   : 0)
930         + (dummyEnd[1] && dummyEnd[2]                ? nNonDummyGhost[0]                     * entriesPerEdge   : 0)
931         + (dummyEnd[0] && dummyEnd[1] && dummyEnd[2] ?                                         entriesPerCorner : 0);
932     } else {
933       entriesToTransferTotal  =  nNonDummyGhost[0] * nNonDummyGhost[1] * nNonDummyGhost[2] * stag->entriesPerElement
934         + (dummyEnd[0]                               ? nNonDummyGhost[1] * nNonDummyGhost[2] * entriesPerFace   : 0)
935         + (dummyEnd[1]                               ? nNonDummyGhost[2] * nNonDummyGhost[0] * entriesPerFace   : 0)
936         + (dummyEnd[2]                               ? nNonDummyGhost[0] * nNonDummyGhost[1] * entriesPerFace   : 0)
937         + (dummyEnd[0] && dummyEnd[1]                ? nNonDummyGhost[2]                     * entriesPerEdge   : 0)
938         + (dummyEnd[2] && dummyEnd[0]                ? nNonDummyGhost[1]                     * entriesPerEdge   : 0)
939         + (dummyEnd[1] && dummyEnd[2]                ? nNonDummyGhost[0]                     * entriesPerEdge   : 0)
940         + (dummyEnd[0] && dummyEnd[1] && dummyEnd[2] ?                                         entriesPerCorner : 0);
941     }
942   }
943 
944   /* Allocate arrays to populate */
945   ierr = PetscMalloc1(entriesToTransferTotal,&idxLocal);CHKERRQ(ierr);
946   ierr = PetscMalloc1(entriesToTransferTotal,&idxGlobal);CHKERRQ(ierr);
947 
948   /* Counts into idxLocal/idxGlobal */
949   count = 0;
950 
951   /*  Loop over each of the 27 neighbor, populating idxLocal and idxGlobal. These
952       cases are principally distinguished by
953 
954       1. The loop bounds (i/ighost,j/jghost,k/kghost)
955       2. the strides used in the loop body, which depend on whether the current
956       rank and/or its neighbor are a non-periodic right/top/front boundary, which has additional
957       points in the global representation.
958       - If the neighboring rank is a right/top/front boundary,
959       then eprNeighbor (entries per element row on the neighbor) and/or eplNeighbor (entries per element layer on the neighbor)
960       are different.
961       - If this rank is a non-periodic right/top/front boundary (checking entries of stag->lastRank),
962       there is an extra loop over 1-3 boundary faces)
963 
964       Here, we do not include "dummy" dof (points in the local representation which
965       do not correspond to any global dof). This, and the fact that we iterate over points in terms of
966       increasing global ordering, are the main two differences from the construction of
967       the Local-to-global map, which iterates over points in local order, and does include dummy points. */
968 
969   /* LEFT DOWN BACK */
970   if (!star && !dummyStart[0] && !dummyStart[1] && !dummyStart[2]) {
971     const PetscInt  neighbor     = 0;
972     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
973     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1];
974     const PetscInt  epFaceRow    = -1;
975     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
976     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
977     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
978     const PetscInt  startGhost0  = 0;
979     const PetscInt  startGhost1  = 0;
980     const PetscInt  startGhost2  = 0;
981     const PetscInt  endGhost0    = ghostOffsetStart[0];
982     const PetscInt  endGhost1    = ghostOffsetStart[1];
983     const PetscInt  endGhost2    = ghostOffsetStart[2];
984     const PetscBool extra0       = PETSC_FALSE;
985     const PetscBool extra1       = PETSC_FALSE;
986     const PetscBool extra2       = PETSC_FALSE;
987     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
988   }
989 
990   /* DOWN BACK */
991   if (!star && !dummyStart[1] && !dummyStart[2]) {
992     const PetscInt  neighbor     = 1;
993     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
994     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1];
995     const PetscInt  epFaceRow    = -1;
996     const PetscInt  start0       = 0;
997     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
998     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
999     const PetscInt  startGhost0  = ghostOffsetStart[0];
1000     const PetscInt  startGhost1  = 0;
1001     const PetscInt  startGhost2  = 0;
1002     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1003     const PetscInt  endGhost1    = ghostOffsetStart[1];
1004     const PetscInt  endGhost2    = ghostOffsetStart[2];
1005     const PetscBool extra0       = dummyEnd[0];
1006     const PetscBool extra1       = PETSC_FALSE;
1007     const PetscBool extra2       = PETSC_FALSE;
1008     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1009   }
1010 
1011   /* RIGHT DOWN BACK */
1012   if (!star && !dummyEnd[0] && !dummyStart[1] && !dummyStart[2]) {
1013     const PetscInt  neighbor     = 2;
1014     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1015     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1];
1016     const PetscInt  epFaceRow    = -1;
1017     const PetscInt  start0       = 0;
1018     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1019     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1020     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1021     const PetscInt  startGhost1  = 0;
1022     const PetscInt  startGhost2  = 0;
1023     const PetscInt  endGhost0    = stag->nGhost[0];
1024     const PetscInt  endGhost1    = ghostOffsetStart[1];
1025     const PetscInt  endGhost2    = ghostOffsetStart[2];
1026     const PetscBool extra0       = PETSC_FALSE;
1027     const PetscBool extra1       = PETSC_FALSE;
1028     const PetscBool extra2       = PETSC_FALSE;
1029     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1030   }
1031 
1032   /* LEFT BACK */
1033   if (!star && !dummyStart[0] && !dummyStart[2]) {
1034     const PetscInt  neighbor     = 3;
1035     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1036     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0);  /* May be a top boundary */
1037     const PetscInt  epFaceRow    = -1;
1038     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1039     const PetscInt  start1       = 0;
1040     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1041     const PetscInt  startGhost0  = 0;
1042     const PetscInt  startGhost1  = ghostOffsetStart[1];
1043     const PetscInt  startGhost2  = 0;
1044     const PetscInt  endGhost0    = ghostOffsetStart[0];
1045     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1046     const PetscInt  endGhost2    = ghostOffsetStart[2];
1047     const PetscBool extra0       = PETSC_FALSE;
1048     const PetscBool extra1       = dummyEnd[1];
1049     const PetscBool extra2       = PETSC_FALSE;
1050     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1051   }
1052 
1053   /* BACK */
1054   if (!dummyStart[2]) {
1055     const PetscInt  neighbor     = 4;
1056     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may  be a right boundary */
1057     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We+neighbor may be a top boundary */
1058     const PetscInt  epFaceRow    = -1;
1059     const PetscInt  start0       = 0;
1060     const PetscInt  start1       = 0;
1061     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1062     const PetscInt  startGhost0  = ghostOffsetStart[0];
1063     const PetscInt  startGhost1  = ghostOffsetStart[1];
1064     const PetscInt  startGhost2  = 0;
1065     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1066     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1067     const PetscInt  endGhost2    = ghostOffsetStart[2];
1068     const PetscBool extra0       = dummyEnd[0];
1069     const PetscBool extra1       = dummyEnd[1];
1070     const PetscBool extra2       = PETSC_FALSE;
1071     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1072   }
1073 
1074   /* RIGHT BACK */
1075   if (!star && !dummyEnd[0] && !dummyStart[2]) {
1076     const PetscInt  neighbor     = 5;
1077     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1078     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1079     const PetscInt  epFaceRow    = -1;
1080     const PetscInt  start0       = 0;
1081     const PetscInt  start1       = 0;
1082     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1083     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1084     const PetscInt  startGhost1  = ghostOffsetStart[1];
1085     const PetscInt  startGhost2  = 0;
1086     const PetscInt  endGhost0    = stag->nGhost[0];
1087     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1088     const PetscInt  endGhost2    = ghostOffsetStart[2];
1089     const PetscBool extra0       = PETSC_FALSE;
1090     const PetscBool extra1       = dummyEnd[1];
1091     const PetscBool extra2       = PETSC_FALSE;
1092     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1093   }
1094 
1095   /* LEFT UP BACK */
1096   if (!star && !dummyStart[0] && !dummyEnd[1] && !dummyStart[2]) {
1097     const PetscInt  neighbor     = 6;
1098     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1099     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1100     const PetscInt  epFaceRow    = -1;
1101     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1102     const PetscInt  start1       = 0;
1103     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1104     const PetscInt  startGhost0  = 0;
1105     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1106     const PetscInt  startGhost2  = 0;
1107     const PetscInt  endGhost0    = ghostOffsetStart[0];
1108     const PetscInt  endGhost1    = stag->nGhost[1];
1109     const PetscInt  endGhost2    = ghostOffsetStart[2];
1110     const PetscBool extra0       = PETSC_FALSE;
1111     const PetscBool extra1       = PETSC_FALSE;
1112     const PetscBool extra2       = PETSC_FALSE;
1113     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1114   }
1115 
1116   /* UP BACK */
1117   if (!star && !dummyEnd[1] && !dummyStart[2]) {
1118     const PetscInt  neighbor     = 7;
1119     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1120     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1121     const PetscInt  epFaceRow    = -1;
1122     const PetscInt  start0       = 0;
1123     const PetscInt  start1       = 0;
1124     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1125     const PetscInt  startGhost0  = ghostOffsetStart[0];
1126     const PetscInt  startGhost1  = stag->nGhost[1]-ghostOffsetEnd[1];
1127     const PetscInt  startGhost2  = 0;
1128     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1129     const PetscInt  endGhost1    = stag->nGhost[1];
1130     const PetscInt  endGhost2    = ghostOffsetStart[2];
1131     const PetscBool extra0       = dummyEnd[0];
1132     const PetscBool extra1       = PETSC_FALSE;
1133     const PetscBool extra2       = PETSC_FALSE;
1134     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1135   }
1136 
1137   /* RIGHT UP BACK */
1138   if (!star && !dummyEnd[0] && !dummyEnd[1] && !dummyStart[2]) {
1139     const PetscInt  neighbor     = 8;
1140     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1141     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1142     const PetscInt  epFaceRow    = -1;
1143     const PetscInt  start0       = 0;
1144     const PetscInt  start1       = 0;
1145     const PetscInt  start2       = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1146     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1147     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1148     const PetscInt  startGhost2  = 0;
1149     const PetscInt  endGhost0    = stag->nGhost[0];
1150     const PetscInt  endGhost1    = stag->nGhost[1];
1151     const PetscInt  endGhost2    = ghostOffsetStart[2];
1152     const PetscBool extra0       = PETSC_FALSE;
1153     const PetscBool extra1       = PETSC_FALSE;
1154     const PetscBool extra2       = PETSC_FALSE;
1155     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1156   }
1157 
1158   /* LEFT DOWN */
1159   if (!star && !dummyStart[0] && !dummyStart[1]) {
1160     const PetscInt  neighbor     = 9;
1161     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1162     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1];
1163     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0]; /* Note that we can't be a right boundary */
1164     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1165     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1166     const PetscInt  start2       = 0;
1167     const PetscInt  startGhost0  = 0;
1168     const PetscInt  startGhost1  = 0;
1169     const PetscInt  startGhost2  = ghostOffsetStart[2];
1170     const PetscInt  endGhost0    = ghostOffsetStart[0];
1171     const PetscInt  endGhost1    = ghostOffsetStart[1];
1172     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1173     const PetscBool extra0       = PETSC_FALSE;
1174     const PetscBool extra1       = PETSC_FALSE;
1175     const PetscBool extra2       = dummyEnd[2];
1176     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1177   }
1178 
1179   /* DOWN */
1180   if (!dummyStart[1]) {
1181     const PetscInt  neighbor     = 10;
1182     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1183     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1];
1184     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
1185     const PetscInt  start0       = 0;
1186     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1187     const PetscInt  start2       = 0;
1188     const PetscInt  startGhost0  = ghostOffsetStart[0];
1189     const PetscInt  startGhost1  = 0;
1190     const PetscInt  startGhost2  = ghostOffsetStart[2];
1191     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1192     const PetscInt  endGhost1    = ghostOffsetStart[1];
1193     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1194     const PetscBool extra0       = dummyEnd[0];
1195     const PetscBool extra1       = PETSC_FALSE;
1196     const PetscBool extra2       = dummyEnd[2];
1197     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1198   }
1199 
1200   /* RIGHT DOWN */
1201   if (!star && !dummyEnd[0] && !dummyStart[1]) {
1202     const PetscInt  neighbor     = 11;
1203     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1204     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1];
1205     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
1206     const PetscInt  start0       = 0;
1207     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1208     const PetscInt  start2       = 0;
1209     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1210     const PetscInt  startGhost1  = 0;
1211     const PetscInt  startGhost2  = ghostOffsetStart[2];
1212     const PetscInt  endGhost0    = stag->nGhost[0];
1213     const PetscInt  endGhost1    = ghostOffsetStart[1];
1214     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1215     const PetscBool extra0       = PETSC_FALSE;
1216     const PetscBool extra1       = PETSC_FALSE;
1217     const PetscBool extra2       = dummyEnd[2];
1218     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1219   }
1220 
1221   /* LEFT */
1222   if (!dummyStart[0]) {
1223     const PetscInt  neighbor     = 12;
1224     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1225     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0);  /* We+neighbor may be a top boundary */
1226     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0];
1227     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1228     const PetscInt  start1       = 0;
1229     const PetscInt  start2       = 0;
1230     const PetscInt  startGhost0  = 0;
1231     const PetscInt  startGhost1  = ghostOffsetStart[1];
1232     const PetscInt  startGhost2  = ghostOffsetStart[2];
1233     const PetscInt  endGhost0    = ghostOffsetStart[0];
1234     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1235     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1236     const PetscBool extra0       = PETSC_FALSE;
1237     const PetscBool extra1       = dummyEnd[1];
1238     const PetscBool extra2       = dummyEnd[2];
1239     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1240   }
1241 
1242   /* (HERE) */
1243   {
1244     const PetscInt  neighbor     = 13;
1245     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We may be a right boundary */
1246     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We may be a top boundary */
1247     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
1248     const PetscInt  start0       = 0;
1249     const PetscInt  start1       = 0;
1250     const PetscInt  start2       = 0;
1251     const PetscInt  startGhost0  = ghostOffsetStart[0];
1252     const PetscInt  startGhost1  = ghostOffsetStart[1];
1253     const PetscInt  startGhost2  = ghostOffsetStart[2];
1254     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1255     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1256     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1257     const PetscBool extra0       = dummyEnd[0];
1258     const PetscBool extra1       = dummyEnd[1];
1259     const PetscBool extra2       = dummyEnd[2];
1260     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1261   }
1262 
1263   /* RIGHT */
1264   if (!dummyEnd[0]) {
1265     const PetscInt  neighbor     = 14;
1266     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1267     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1268     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
1269     const PetscInt  start0       = 0;
1270     const PetscInt  start1       = 0;
1271     const PetscInt  start2       = 0;
1272     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1273     const PetscInt  startGhost1  = ghostOffsetStart[1];
1274     const PetscInt  startGhost2  = ghostOffsetStart[2];
1275     const PetscInt  endGhost0    = stag->nGhost[0];
1276     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1277     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1278     const PetscBool extra0       = PETSC_FALSE;
1279     const PetscBool extra1       = dummyEnd[1];
1280     const PetscBool extra2       = dummyEnd[2];
1281     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1282   }
1283 
1284   /* LEFT UP */
1285   if (!star && !dummyStart[0] && !dummyEnd[1]) {
1286     const PetscInt  neighbor     = 15;
1287     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1288     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1289     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0];
1290     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1291     const PetscInt  start1       = 0;
1292     const PetscInt  start2       = 0;
1293     const PetscInt  startGhost0  = 0;
1294     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1295     const PetscInt  startGhost2  = ghostOffsetStart[2];
1296     const PetscInt  endGhost0    = ghostOffsetStart[0];
1297     const PetscInt  endGhost1    = stag->nGhost[1];
1298     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1299     const PetscBool extra0       = PETSC_FALSE;
1300     const PetscBool extra1       = PETSC_FALSE;
1301     const PetscBool extra2       = dummyEnd[2];
1302     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1303   }
1304 
1305   /* UP */
1306   if (!dummyEnd[1]) {
1307     const PetscInt  neighbor     = 16;
1308     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1309     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1310     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
1311     const PetscInt  start0       = 0;
1312     const PetscInt  start1       = 0;
1313     const PetscInt  start2       = 0;
1314     const PetscInt  startGhost0  = ghostOffsetStart[0];
1315     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1316     const PetscInt  startGhost2  = ghostOffsetStart[2];
1317     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1318     const PetscInt  endGhost1    = stag->nGhost[1];
1319     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1320     const PetscBool extra0       = dummyEnd[0];
1321     const PetscBool extra1       = PETSC_FALSE;
1322     const PetscBool extra2       = dummyEnd[2];
1323     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1324   }
1325 
1326   /* RIGHT UP */
1327   if (!star && !dummyEnd[0] && !dummyEnd[1]) {
1328     const PetscInt  neighbor     = 17;
1329     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1330     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1331     const PetscInt  epFaceRow    = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
1332     const PetscInt  start0       = 0;
1333     const PetscInt  start1       = 0;
1334     const PetscInt  start2       = 0;
1335     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1336     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1337     const PetscInt  startGhost2  = ghostOffsetStart[2];
1338     const PetscInt  endGhost0    = stag->nGhost[0];
1339     const PetscInt  endGhost1    = stag->nGhost[1];
1340     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
1341     const PetscBool extra0       = PETSC_FALSE;
1342     const PetscBool extra1       = PETSC_FALSE;
1343     const PetscBool extra2       = dummyEnd[2];
1344     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1345   }
1346 
1347   /* LEFT DOWN FRONT */
1348   if (!star && !dummyStart[0] && !dummyStart[1] && !dummyEnd[2]) {
1349     const PetscInt  neighbor     = 18;
1350     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1351     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1];
1352     const PetscInt  epFaceRow    = -1;
1353     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1354     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1355     const PetscInt  start2       = 0;
1356     const PetscInt  startGhost0  = 0;
1357     const PetscInt  startGhost1  = 0;
1358     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1359     const PetscInt  endGhost0    = ghostOffsetStart[0];
1360     const PetscInt  endGhost1    = ghostOffsetStart[1];
1361     const PetscInt  endGhost2    = stag->nGhost[2];
1362     const PetscBool extra0       = PETSC_FALSE;
1363     const PetscBool extra1       = PETSC_FALSE;
1364     const PetscBool extra2       = PETSC_FALSE;
1365     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1366   }
1367 
1368   /* DOWN FRONT */
1369   if (!star && !dummyStart[1] && !dummyEnd[2]) {
1370     const PetscInt  neighbor     = 19;
1371     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1372     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1];
1373     const PetscInt  epFaceRow    = -1;
1374     const PetscInt  start0       = 0;
1375     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1376     const PetscInt  start2       = 0;
1377     const PetscInt  startGhost0  = ghostOffsetStart[0];
1378     const PetscInt  startGhost1  = 0;
1379     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1380     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1381     const PetscInt  endGhost1    = ghostOffsetStart[1];
1382     const PetscInt  endGhost2    = stag->nGhost[2];
1383     const PetscBool extra0       = dummyEnd[0];
1384     const PetscBool extra1       = PETSC_FALSE;
1385     const PetscBool extra2       = PETSC_FALSE;
1386     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1387   }
1388 
1389   /* RIGHT DOWN FRONT */
1390   if (!star && !dummyEnd[0] && !dummyStart[1] && !dummyEnd[2]) {
1391     const PetscInt  neighbor     = 20;
1392     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1393     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1];
1394     const PetscInt  epFaceRow    = -1;
1395     const PetscInt  start0       = 0;
1396     const PetscInt  start1       = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1397     const PetscInt  start2       = 0;
1398     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1399     const PetscInt  startGhost1  = 0;
1400     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1401     const PetscInt  endGhost0    = stag->nGhost[0];
1402     const PetscInt  endGhost1    = ghostOffsetStart[1];
1403     const PetscInt  endGhost2    = stag->nGhost[2];
1404     const PetscBool extra0       = PETSC_FALSE;
1405     const PetscBool extra1       = PETSC_FALSE;
1406     const PetscBool extra2       = PETSC_FALSE;
1407     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1408   }
1409 
1410   /* LEFT FRONT */
1411   if (!star && !dummyStart[0] && !dummyEnd[2]) {
1412     const PetscInt  neighbor     = 21;
1413     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1414     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0);  /* We+neighbor may be a top boundary */
1415     const PetscInt  epFaceRow    = -1;
1416     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1417     const PetscInt  start1       = 0;
1418     const PetscInt  start2       = 0;
1419     const PetscInt  startGhost0  = 0;
1420     const PetscInt  startGhost1  = ghostOffsetStart[1];
1421     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1422     const PetscInt  endGhost0    = ghostOffsetStart[0];
1423     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1424     const PetscInt  endGhost2    = stag->nGhost[2];
1425     const PetscBool extra0       = PETSC_FALSE;
1426     const PetscBool extra1       = dummyEnd[1];
1427     const PetscBool extra2       = PETSC_FALSE;
1428     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1429   }
1430 
1431   /* FRONT */
1432   if (!dummyEnd[2]) {
1433     const PetscInt  neighbor     = 22;
1434     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* neighbor is a right boundary if we are*/
1435     const PetscInt  eplNeighbor  = eprNeighbor  * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* May be a top boundary */
1436     const PetscInt  epFaceRow    = -1;
1437     const PetscInt  start0       = 0;
1438     const PetscInt  start1       = 0;
1439     const PetscInt  start2       = 0;
1440     const PetscInt  startGhost0  = ghostOffsetStart[0];
1441     const PetscInt  startGhost1  = ghostOffsetStart[1];
1442     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1443     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1444     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1445     const PetscInt  endGhost2    = stag->nGhost[2];
1446     const PetscBool extra0       = dummyEnd[0];
1447     const PetscBool extra1       = dummyEnd[1];
1448     const PetscBool extra2       = PETSC_FALSE;
1449     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1450   }
1451 
1452   /* RIGHT FRONT */
1453   if (!star && !dummyEnd[0] && !dummyEnd[2]) {
1454     const PetscInt  neighbor     = 23;
1455     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1456     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1457     const PetscInt  epFaceRow    = -1;
1458     const PetscInt  start0       = 0;
1459     const PetscInt  start1       = 0;
1460     const PetscInt  start2       = 0;
1461     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1462     const PetscInt  startGhost1  = ghostOffsetStart[1];
1463     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1464     const PetscInt  endGhost0    = stag->nGhost[0];
1465     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
1466     const PetscInt  endGhost2    = stag->nGhost[2];
1467     const PetscBool extra0       = PETSC_FALSE;
1468     const PetscBool extra1       = dummyEnd[1];
1469     const PetscBool extra2       = PETSC_FALSE;
1470     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1471   }
1472 
1473   /* LEFT UP FRONT */
1474   if (!star && !dummyStart[0] && !dummyEnd[1] && !dummyEnd[2]) {
1475     const PetscInt  neighbor     = 24;
1476     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0];
1477     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1478     const PetscInt  epFaceRow    = -1;
1479     const PetscInt  start0       = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1480     const PetscInt  start1       = 0;
1481     const PetscInt  start2       = 0;
1482     const PetscInt  startGhost0  = 0;
1483     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1484     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1485     const PetscInt  endGhost0    = ghostOffsetStart[0];
1486     const PetscInt  endGhost1    = stag->nGhost[1];
1487     const PetscInt  endGhost2    = stag->nGhost[2];
1488     const PetscBool extra0       = PETSC_FALSE;
1489     const PetscBool extra1       = PETSC_FALSE;
1490     const PetscBool extra2       = PETSC_FALSE;
1491     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1492   }
1493 
1494   /* UP FRONT */
1495   if (!star && !dummyEnd[1] && !dummyEnd[2]) {
1496     const PetscInt  neighbor     = 25;
1497     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1498     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1499     const PetscInt  epFaceRow    = -1;
1500     const PetscInt  start0       = 0;
1501     const PetscInt  start1       = 0;
1502     const PetscInt  start2       = 0;
1503     const PetscInt  startGhost0  = ghostOffsetStart[0];
1504     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1505     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1506     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
1507     const PetscInt  endGhost1    = stag->nGhost[1];
1508     const PetscInt  endGhost2    = stag->nGhost[2];
1509     const PetscBool extra0       = dummyEnd[0];
1510     const PetscBool extra1       = PETSC_FALSE;
1511     const PetscBool extra2       = PETSC_FALSE;
1512     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1513   }
1514 
1515   /* RIGHT UP FRONT */
1516   if (!star && !dummyEnd[0] && !dummyEnd[1] && !dummyEnd[2]) {
1517     const PetscInt  neighbor     = 26;
1518     const PetscInt  eprNeighbor  = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1519     const PetscInt  eplNeighbor  = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1520     const PetscInt  epFaceRow    = -1;
1521     const PetscInt  start0       = 0;
1522     const PetscInt  start1       = 0;
1523     const PetscInt  start2       = 0;
1524     const PetscInt  startGhost0  = stag->nGhost[0] - ghostOffsetEnd[0];
1525     const PetscInt  startGhost1  = stag->nGhost[1] - ghostOffsetEnd[1];
1526     const PetscInt  startGhost2  = stag->nGhost[2] - ghostOffsetEnd[2];
1527     const PetscInt  endGhost0    = stag->nGhost[0];
1528     const PetscInt  endGhost1    = stag->nGhost[1];
1529     const PetscInt  endGhost2    = stag->nGhost[2];
1530     const PetscBool extra0       = PETSC_FALSE;
1531     const PetscBool extra1       = PETSC_FALSE;
1532     const PetscBool extra2       = PETSC_FALSE;
1533     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,eprNeighbor,eplNeighbor,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
1534   }
1535 
1536   if (count != entriesToTransferTotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of entries computed in gtol (%d) is not as expected (%d)",count,entriesToTransferTotal);
1537 
1538   /* Create stag->gtol. The order is computed as PETSc ordering, and doesn't include dummy entries */
1539   {
1540     Vec local,global;
1541     IS isLocal,isGlobal;
1542     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm),entriesToTransferTotal,idxLocal,PETSC_OWN_POINTER,&isLocal);CHKERRQ(ierr);
1543     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm),entriesToTransferTotal,idxGlobal,PETSC_OWN_POINTER,&isGlobal);CHKERRQ(ierr);
1544     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm),1,stag->entries,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr);
1545     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,stag->entriesPerElement,stag->entriesGhost,NULL,&local);CHKERRQ(ierr);
1546     ierr = VecScatterCreate(global,isGlobal,local,isLocal,&stag->gtol);CHKERRQ(ierr);
1547     ierr = VecDestroy(&global);CHKERRQ(ierr);
1548     ierr = VecDestroy(&local);CHKERRQ(ierr);
1549     ierr = ISDestroy(&isLocal);CHKERRQ(ierr);  /* frees idxLocal */
1550     ierr = ISDestroy(&isGlobal);CHKERRQ(ierr); /* free idxGlobal */
1551   }
1552 
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 /* Note: this function assumes that DMBoundary types of none, ghosted, and periodic are the only ones of interest.
1557 Adding support for others should be done very carefully.  */
DMStagSetUpBuildL2G_3d(DM dm,const PetscInt * globalOffsets)1558 static PetscErrorCode DMStagSetUpBuildL2G_3d(DM dm,const PetscInt *globalOffsets)
1559 {
1560   PetscErrorCode        ierr;
1561   const DM_Stag * const stag = (DM_Stag*)dm->data;
1562   PetscInt              *idxGlobalAll;
1563   PetscInt              d,count,ighost,jghost,kghost,ghostOffsetStart[3],ghostOffsetEnd[3],entriesPerFace,entriesPerEdge;
1564   PetscInt              nNeighbors[27][3],eprNeighbor[27],eplNeighbor[27],globalOffsetNeighbor[27];
1565   PetscBool             nextToDummyEnd[3],dummyStart[3],dummyEnd[3],star;
1566 
1567   PetscFunctionBegin;
1568 
1569   /* Check stencil type */
1570   if (stag->stencilType != DMSTAG_STENCIL_NONE && stag->stencilType != DMSTAG_STENCIL_BOX && stag->stencilType != DMSTAG_STENCIL_STAR) SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unsupported stencil type %s",DMStagStencilTypes[stag->stencilType]);
1571   if (stag->stencilType == DMSTAG_STENCIL_NONE && stag->stencilWidth != 0) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"DMStag 3d setup requires stencil width 0 with stencil type none");
1572   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR);
1573 
1574   /* Convenience variables */
1575   entriesPerFace                      = stag->dof[0] + 2*stag->dof[1] + stag->dof[2];
1576   entriesPerEdge                      = stag->dof[0] + stag->dof[1];
1577   for (d=0;d<3;++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d]-2);
1578 
1579   /* Ghost offsets (may not be the ghost width, since we always have at least one ghost element on the right/top/front) */
1580   for (d=0; d<3; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
1581   for (d=0; d<3; ++d) ghostOffsetEnd[d]   = stag->startGhost[d]+stag->nGhost[d] - (stag->start[d]+stag->n[d]);
1582 
1583   /* Whether the ghost region includes dummies. Currently equivalent to being a non-periodic boundary. */
1584   for (d=0; d<3; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1585   for (d=0; d<3; ++d) dummyEnd[d]   = (PetscBool)(stag->lastRank[d]  && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1586 
1587   /* Compute numbers of elements on each neighbor  and offset*/
1588   {
1589     PetscInt i;
1590     for (i=0; i<27; ++i) {
1591       const PetscInt neighborRank = stag->neighbors[i];
1592       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 13) */
1593         nNeighbors[i][0] =  stag->l[0][neighborRank % stag->nRanks[0]];
1594         nNeighbors[i][1] =  stag->l[1][neighborRank % (stag->nRanks[0] * stag->nRanks[1]) / stag->nRanks[0]];
1595         nNeighbors[i][2] =  stag->l[2][neighborRank / (stag->nRanks[0] * stag->nRanks[1])];
1596         globalOffsetNeighbor[i] = globalOffsets[stag->neighbors[i]];
1597       } /* else leave uninitialized - error if accessed */
1598     }
1599   }
1600 
1601   /* Precompute elements per row and layer on neighbor (zero unused) */
1602   ierr = PetscMemzero(eprNeighbor,sizeof(eprNeighbor));CHKERRQ(ierr);
1603   ierr = PetscMemzero(eplNeighbor,sizeof(eplNeighbor));CHKERRQ(ierr);
1604   if (stag->neighbors[0] >= 0) {
1605     eprNeighbor[0] = stag->entriesPerElement * nNeighbors[0][0];
1606     eplNeighbor[0] = eprNeighbor[0] * nNeighbors[0][1];
1607   }
1608   if (stag->neighbors[1] >= 0) {
1609     eprNeighbor[1] = stag->entriesPerElement * nNeighbors[1][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1610     eplNeighbor[1] = eprNeighbor[1] * nNeighbors[1][1];
1611   }
1612   if (stag->neighbors[2] >= 0) {
1613     eprNeighbor[2] = stag->entriesPerElement * nNeighbors[2][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1614     eplNeighbor[2] = eprNeighbor[2] * nNeighbors[2][1];
1615   }
1616   if (stag->neighbors[3] >= 0) {
1617     eprNeighbor[3] = stag->entriesPerElement * nNeighbors[3][0];
1618     eplNeighbor[3] = eprNeighbor[3] * nNeighbors[3][1] + (dummyEnd[1] ? nNeighbors[3][0] * entriesPerFace : 0);  /* May be a top boundary */
1619   }
1620   if (stag->neighbors[4] >= 0) {
1621     eprNeighbor[4] = stag->entriesPerElement * nNeighbors[4][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may  be a right boundary */
1622     eplNeighbor[4] = eprNeighbor[4] * nNeighbors[4][1] + (dummyEnd[1] ? nNeighbors[4][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We+neighbor may be a top boundary */
1623   }
1624   if (stag->neighbors[5] >= 0) {
1625     eprNeighbor[5] = stag->entriesPerElement * nNeighbors[5][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1626     eplNeighbor[5] = eprNeighbor[5]  * nNeighbors[5][1] + (dummyEnd[1] ? nNeighbors[5][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1627   }
1628   if (stag->neighbors[6] >= 0) {
1629     eprNeighbor[6] = stag->entriesPerElement * nNeighbors[6][0];
1630     eplNeighbor[6] = eprNeighbor[6] * nNeighbors[6][1] + (nextToDummyEnd[1] ? nNeighbors[6][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1631   }
1632   if (stag->neighbors[7] >= 0) {
1633     eprNeighbor[7] = stag->entriesPerElement * nNeighbors[7][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1634     eplNeighbor[7] = eprNeighbor[7] * nNeighbors[7][1] + (nextToDummyEnd[1] ? nNeighbors[7][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1635   }
1636   if (stag->neighbors[8] >= 0) {
1637     eprNeighbor[8] = stag->entriesPerElement * nNeighbors[8][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1638     eplNeighbor[8] = eprNeighbor[8] * nNeighbors[8][1] + (nextToDummyEnd[1] ? nNeighbors[8][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1639   }
1640   if (stag->neighbors[9] >= 0) {
1641     eprNeighbor[9] = stag->entriesPerElement * nNeighbors[9][0];
1642     eplNeighbor[9] = eprNeighbor[9] * nNeighbors[9][1];
1643   }
1644   if (stag->neighbors[10] >= 0) {
1645     eprNeighbor[10] = stag->entriesPerElement * nNeighbors[10][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1646     eplNeighbor[10] = eprNeighbor[10] * nNeighbors[10][1];
1647   }
1648   if (stag->neighbors[11] >= 0) {
1649     eprNeighbor[11] = stag->entriesPerElement * nNeighbors[11][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1650     eplNeighbor[11] = eprNeighbor[11] * nNeighbors[11][1];
1651   }
1652   if (stag->neighbors[12] >= 0) {
1653     eprNeighbor[12] = stag->entriesPerElement * nNeighbors[12][0];
1654     eplNeighbor[12] = eprNeighbor[12] * nNeighbors[12][1] + (dummyEnd[1] ? nNeighbors[12][0] * entriesPerFace : 0);  /* We+neighbor may be a top boundary */
1655   }
1656   if (stag->neighbors[13] >= 0) {
1657     eprNeighbor[13] = stag->entriesPerElement * nNeighbors[13][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We may be a right boundary */
1658     eplNeighbor[13] = eprNeighbor[13] * nNeighbors[13][1] + (dummyEnd[1] ? nNeighbors[13][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We may be a top boundary */
1659   }
1660   if (stag->neighbors[14] >= 0) {
1661     eprNeighbor[14] = stag->entriesPerElement * nNeighbors[14][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1662     eplNeighbor[14] = eprNeighbor[14] * nNeighbors[14][1] + (dummyEnd[1] ? nNeighbors[14][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1663   }
1664   if (stag->neighbors[15] >= 0) {
1665     eprNeighbor[15] = stag->entriesPerElement * nNeighbors[15][0];
1666     eplNeighbor[15] = eprNeighbor[15] * nNeighbors[15][1] + (nextToDummyEnd[1] ? nNeighbors[15][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1667   }
1668   if (stag->neighbors[16] >= 0) {
1669     eprNeighbor[16] = stag->entriesPerElement * nNeighbors[16][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1670     eplNeighbor[16] = eprNeighbor[16] * nNeighbors[16][1] + (nextToDummyEnd[1] ? nNeighbors[16][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1671   }
1672   if (stag->neighbors[17] >= 0) {
1673     eprNeighbor[17] = stag->entriesPerElement * nNeighbors[17][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1674     eplNeighbor[17] = eprNeighbor[17] * nNeighbors[17][1] + (nextToDummyEnd[1] ? nNeighbors[17][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1675   }
1676   if (stag->neighbors[18] >= 0) {
1677     eprNeighbor[18] = stag->entriesPerElement * nNeighbors[18][0];
1678     eplNeighbor[18] = eprNeighbor[18] * nNeighbors[18][1];
1679   }
1680   if (stag->neighbors[19] >= 0) {
1681     eprNeighbor[19] = stag->entriesPerElement * nNeighbors[19][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1682     eplNeighbor[19] = eprNeighbor[19] * nNeighbors[19][1];
1683   }
1684   if (stag->neighbors[20] >= 0) {
1685     eprNeighbor[20] = stag->entriesPerElement * nNeighbors[20][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1686     eplNeighbor[20] = eprNeighbor[20] * nNeighbors[20][1];
1687   }
1688   if (stag->neighbors[21] >= 0) {
1689     eprNeighbor[21] = stag->entriesPerElement * nNeighbors[21][0];
1690     eplNeighbor[21] = eprNeighbor[21] * nNeighbors[21][1] + (dummyEnd[1] ? nNeighbors[21][0] * entriesPerFace : 0);  /* We+neighbor may be a top boundary */
1691   }
1692   if (stag->neighbors[22] >= 0) {
1693     eprNeighbor[22] = stag->entriesPerElement * nNeighbors[22][0] + (dummyEnd[0] ? entriesPerFace : 0); /* neighbor is a right boundary if we are*/
1694     eplNeighbor[22] = eprNeighbor[22] * nNeighbors[22][1] + (dummyEnd[1] ? nNeighbors[22][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* May be a top boundary */
1695   }
1696   if (stag->neighbors[23] >= 0) {
1697     eprNeighbor[23] = stag->entriesPerElement * nNeighbors[23][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1698     eplNeighbor[23] = eprNeighbor[23] * nNeighbors[23][1] + (dummyEnd[1] ? nNeighbors[23][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1699   }
1700   if (stag->neighbors[24] >= 0) {
1701     eprNeighbor[24] = stag->entriesPerElement * nNeighbors[24][0];
1702     eplNeighbor[24] = eprNeighbor[24] * nNeighbors[24][1] + (nextToDummyEnd[1] ? nNeighbors[24][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1703   }
1704   if (stag->neighbors[25] >= 0) {
1705     eprNeighbor[25] = stag->entriesPerElement * nNeighbors[25][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1706     eplNeighbor[25] = eprNeighbor[25] * nNeighbors[25][1] + (nextToDummyEnd[1] ? nNeighbors[25][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1707   }
1708   if (stag->neighbors[26] >= 0) {
1709     eprNeighbor[26] = stag->entriesPerElement * nNeighbors[26][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1710     eplNeighbor[26] = eprNeighbor[26] * nNeighbors[26][1] + (nextToDummyEnd[1] ? nNeighbors[26][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1711   }
1712 
1713   /* Populate idxGlobalAll */
1714   ierr = PetscMalloc1(stag->entriesGhost,&idxGlobalAll);CHKERRQ(ierr);
1715   count = 0;
1716 
1717   /* Loop over layers 1/3 : Back */
1718   if (!dummyStart[2]) {
1719     for (kghost = 0; kghost<ghostOffsetStart[2]; ++kghost) {
1720       const PetscInt k = nNeighbors[4][2] - ghostOffsetStart[2] + kghost; /* Note: this is the same value for all neighbors in this layer (use neighbor 4 which will always exist if we're lookng at this layer) */
1721 
1722       /* Loop over rows 1/3: Down Back*/
1723       if (!star && !dummyStart[1]) {
1724         for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
1725           const PetscInt j = nNeighbors[1][1] - ghostOffsetStart[1] + jghost; /* Note: this is the same value for all neighbors in this row (use neighbor 1, down back)*/
1726 
1727           /* Loop over columns 1/3: Left Back Down*/
1728           if (!dummyStart[0]) {
1729             const PetscInt neighbor = 0;
1730             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
1731               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
1732               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1733                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1734               }
1735             }
1736           } else {
1737             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
1738               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1739                 idxGlobalAll[count] = -1;
1740               }
1741             }
1742           }
1743 
1744           /* Loop over columns 2/3: (Middle) Down Back */
1745           {
1746             const PetscInt neighbor = 1;
1747             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
1748               const PetscInt i = ighost - ghostOffsetStart[0];
1749               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1750                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1751               }
1752             }
1753           }
1754 
1755           /* Loop over columns 3/3: Right Down Back */
1756           if (!dummyEnd[0]) {
1757             const PetscInt neighbor = 2;
1758             PetscInt       i;
1759             for (i=0; i<ghostOffsetEnd[0]; ++i) {
1760               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1761                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1762               }
1763             }
1764           } else {
1765             /* Partial dummy entries on (Middle) Down Back neighbor */
1766             const PetscInt neighbor          = 1;
1767             PetscInt       i,dLocal;
1768             i = stag->n[0];
1769             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
1770               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1771             }
1772             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
1773               idxGlobalAll[count] = -1;
1774             }
1775             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
1776               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1777             }
1778             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
1779               idxGlobalAll[count] = -1;
1780             }
1781             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
1782               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1783             }
1784             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
1785               idxGlobalAll[count] = -1;
1786             }
1787             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
1788               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1789             }
1790             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
1791               idxGlobalAll[count] = -1;
1792             }
1793             ++i;
1794             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
1795               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1796                 idxGlobalAll[count] = -1;
1797               }
1798             }
1799           }
1800         }
1801       } else {
1802         for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
1803           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
1804             for (d=0; d<stag->entriesPerElement; ++d,++count) {
1805               idxGlobalAll[count] = -1;
1806             }
1807           }
1808         }
1809       }
1810 
1811       /* Loop over rows 2/3: (Middle) Back */
1812       {
1813         for (jghost = ghostOffsetStart[1]; jghost<stag->nGhost[1]-ghostOffsetEnd[1]; ++jghost) {
1814           const PetscInt j = jghost - ghostOffsetStart[1];
1815 
1816           /* Loop over columns 1/3: Left (Middle) Back */
1817           if (!star && !dummyStart[0]) {
1818             const PetscInt neighbor = 3;
1819             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
1820               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
1821               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1822                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1823               }
1824             }
1825           } else {
1826             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
1827               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1828                 idxGlobalAll[count] = -1;
1829               }
1830             }
1831           }
1832 
1833           /* Loop over columns 2/3: (Middle) (Middle) Back */
1834           {
1835             const PetscInt neighbor = 4;
1836             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
1837               const PetscInt i = ighost - ghostOffsetStart[0];
1838               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1839                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1840               }
1841             }
1842           }
1843 
1844           /* Loop over columns 3/3: Right (Middle) Back */
1845           if (!star && !dummyEnd[0]) {
1846             const PetscInt neighbor = 5;
1847             PetscInt       i;
1848             for (i=0; i<ghostOffsetEnd[0]; ++i) {
1849               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1850                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1851               }
1852             }
1853           } else if (dummyEnd[0]) {
1854             /* Partial dummy entries on (Middle) (Middle) Back rank */
1855             const PetscInt neighbor = 4;
1856             PetscInt       i,dLocal;
1857             i = stag->n[0];
1858             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
1859               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1860             }
1861             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
1862               idxGlobalAll[count] = -1;
1863             }
1864             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
1865               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor]+ j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1866             }
1867             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
1868               idxGlobalAll[count] = -1;
1869             }
1870             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
1871               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1872             }
1873             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
1874               idxGlobalAll[count] = -1;
1875             }
1876             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
1877               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1878             }
1879             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
1880               idxGlobalAll[count] = -1;
1881             }
1882             ++i;
1883             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
1884               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1885                 idxGlobalAll[count] = -1;
1886               }
1887             }
1888           } else {
1889             /* Right (Middle) Back dummies */
1890             PetscInt       i;
1891             for (i=0; i<ghostOffsetEnd[0]; ++i) {
1892               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1893                 idxGlobalAll[count] = -1;
1894               }
1895             }
1896           }
1897         }
1898       }
1899 
1900       /* Loop over rows 3/3: Up Back */
1901       if (!star && !dummyEnd[1]) {
1902         PetscInt j;
1903         for (j=0; j<ghostOffsetEnd[1]; ++j) {
1904           /* Loop over columns 1/3: Left Up Back*/
1905           if (!dummyStart[0]) {
1906             const PetscInt neighbor = 6;
1907             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
1908               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
1909               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1910                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1911               }
1912             }
1913           } else {
1914             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
1915               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1916                 idxGlobalAll[count] = -1;
1917               }
1918             }
1919           }
1920 
1921           /* Loop over columns 2/3: (Middle) Up Back*/
1922           {
1923             const PetscInt neighbor = 7;
1924             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
1925               const PetscInt i = ighost - ghostOffsetStart[0];
1926               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1927                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1928               }
1929             }
1930           }
1931 
1932           /* Loop over columns 3/3: Right Up Back*/
1933           if (!dummyEnd[0]) {
1934             const PetscInt neighbor = 8;
1935             PetscInt       i;
1936             for (i=0; i<ghostOffsetEnd[0]; ++i) {
1937               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1938                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1939               }
1940             }
1941           } else {
1942             /* Partial dummies on (Middle) Up Back */
1943             const PetscInt neighbor = 7;
1944             PetscInt       i,dLocal;
1945             i = stag->n[0];
1946             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
1947               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1948             }
1949             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
1950               idxGlobalAll[count] = -1;
1951             }
1952             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
1953               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1954             }
1955             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
1956               idxGlobalAll[count] = -1;
1957             }
1958             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
1959               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1960             }
1961             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
1962               idxGlobalAll[count] = -1;
1963             }
1964             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
1965               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
1966             }
1967             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
1968               idxGlobalAll[count] = -1;
1969             }
1970             ++i;
1971             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
1972               for (d=0; d<stag->entriesPerElement; ++d,++count) {
1973                 idxGlobalAll[count] = -1;
1974               }
1975             }
1976           }
1977         }
1978       } else if (dummyEnd[1]) {
1979         /* Up Back partial dummy row */
1980         PetscInt j = stag->n[1];
1981 
1982         if (!star && !dummyStart[0]) {
1983           /* Up Back partial dummy row: Loop over columns 1/3: Left Up Back, on Left (Middle) Back rank */
1984           const PetscInt neighbor = 3;
1985           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
1986             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
1987             PetscInt       dLocal;
1988             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
1989               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
1990             }
1991 
1992             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
1993               idxGlobalAll[count] = -1;
1994             }
1995             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
1996               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
1997             }
1998             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
1999               idxGlobalAll[count] = -1;
2000             }
2001           }
2002         } else {
2003           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2004             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2005               idxGlobalAll[count] = -1;
2006             }
2007           }
2008         }
2009 
2010         /* Up Back partial dummy row: Loop over columns 2/3: (Middle) Up Back, on (Middle) (Middle) Back rank */
2011         {
2012           const PetscInt neighbor = 4;
2013           for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2014             const PetscInt i = ighost - ghostOffsetStart[0];
2015             PetscInt       dLocal;
2016             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2017               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2018             }
2019             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2020               idxGlobalAll[count] = -1;
2021             }
2022             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2023               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2024             }
2025             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2026               idxGlobalAll[count] = -1;
2027             }
2028           }
2029         }
2030 
2031         /* Up Back partial dummy row: Loop over columns 3/3: Right Up Back, on Right (Middle) Back rank */
2032         if (!star && !dummyEnd[0]) {
2033           const PetscInt neighbor = 5;
2034           PetscInt       i;
2035           for (i=0; i<ghostOffsetEnd[0]; ++i) {
2036             PetscInt       dLocal;
2037             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2038               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2039             }
2040             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2041               idxGlobalAll[count] = -1;
2042             }
2043             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2044               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2045             }
2046             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2047               idxGlobalAll[count] = -1;
2048             }
2049           }
2050         } else if (dummyEnd[0]) {
2051           /* Up Right Back partial dummy element, on (Middle) (Middle) Back rank */
2052           const PetscInt neighbor = 4;
2053           PetscInt       i,dLocal;
2054           i = stag->n[0];
2055           for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2056             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2057           }
2058           for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back down edge, back face and back left edge */
2059             idxGlobalAll[count] = -1;
2060           }
2061           for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2062             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2063           }
2064           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies down face, left face and element */
2065             idxGlobalAll[count] = -1;
2066           }
2067           ++i;
2068           for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2069             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2070               idxGlobalAll[count] = -1;
2071             }
2072           }
2073         } else {
2074           /* Up Right Back dummies */
2075           PetscInt i;
2076           for (i=0; i<ghostOffsetEnd[0]; ++i) {
2077             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2078               idxGlobalAll[count] = -1;
2079             }
2080           }
2081         }
2082         ++j;
2083         /* Up Back additional dummy rows */
2084         for (; j<stag->n[1] + ghostOffsetEnd[1]; ++j) {
2085           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2086             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2087               idxGlobalAll[count] = -1;
2088             }
2089           }
2090         }
2091       } else {
2092         /* Up Back dummy rows */
2093         PetscInt j;
2094         for (j=0; j<ghostOffsetEnd[1]; ++j) {
2095           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2096             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2097               idxGlobalAll[count] = -1;
2098             }
2099           }
2100         }
2101       }
2102     }
2103   } else {
2104     for (kghost = 0; kghost<ghostOffsetStart[2]; ++kghost) {
2105       for (jghost = 0; jghost<stag->nGhost[1]; ++jghost) {
2106         for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2107           for (d=0; d<stag->entriesPerElement; ++d,++count) {
2108             idxGlobalAll[count] = -1;
2109           }
2110         }
2111       }
2112     }
2113   }
2114 
2115   /* Loop over layers 2/3 : (Middle)  */
2116   {
2117     for (kghost = ghostOffsetStart[2]; kghost<stag->nGhost[2]-ghostOffsetEnd[2]; ++kghost) {
2118       const PetscInt k = kghost - ghostOffsetStart[2];
2119 
2120       /* Loop over rows 1/3: Down (Middle) */
2121       if (!dummyStart[1]) {
2122         for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
2123           const PetscInt j = nNeighbors[10][1] - ghostOffsetStart[1] + jghost; /* down neighbor (10) always exists */
2124 
2125           /* Loop over columns 1/3: Left Down (Middle) */
2126           if (!star && !dummyStart[0]) {
2127             const PetscInt neighbor = 9;
2128             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2129               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2130               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2131                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2132               }
2133             }
2134           } else {
2135             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2136               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2137                 idxGlobalAll[count] = -1;
2138               }
2139             }
2140           }
2141 
2142           /* Loop over columns 2/3: (Middle) Down (Middle) */
2143           {
2144             const PetscInt neighbor = 10;
2145             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2146               const PetscInt i = ighost - ghostOffsetStart[0];
2147               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2148                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2149               }
2150             }
2151           }
2152 
2153           /* Loop over columns 3/3: Right Down (Middle) */
2154           if (!star && !dummyEnd[0]) {
2155             const PetscInt neighbor = 11;
2156             PetscInt       i;
2157             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2158               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2159                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2160               }
2161             }
2162           } else if (dummyEnd[0]) {
2163             /* Partial dummies on (Middle) Down (Middle) neighbor */
2164             const PetscInt neighbor = 10;
2165             PetscInt       i,dLocal;
2166             i = stag->n[0];
2167             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2168               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2169             }
2170             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
2171               idxGlobalAll[count] = -1;
2172             }
2173             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2174               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor]+ j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2175             }
2176             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
2177               idxGlobalAll[count] = -1;
2178             }
2179             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2180               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2181             }
2182             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2183               idxGlobalAll[count] = -1;
2184             }
2185             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2186               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2187             }
2188             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2189               idxGlobalAll[count] = -1;
2190             }
2191             ++i;
2192             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2193               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2194                 idxGlobalAll[count] = -1;
2195               }
2196             }
2197           } else {
2198             /* Right Down (Middle) dummies */
2199             PetscInt       i;
2200             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2201               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2202                 idxGlobalAll[count] = -1;
2203               }
2204             }
2205           }
2206         }
2207       } else {
2208         for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
2209           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2210             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2211               idxGlobalAll[count] = -1;
2212             }
2213           }
2214         }
2215       }
2216 
2217       /* Loop over rows 2/3: (Middle) (Middle) */
2218       {
2219         for (jghost = ghostOffsetStart[1]; jghost<stag->nGhost[1]-ghostOffsetEnd[1]; ++jghost) {
2220           const PetscInt j = jghost - ghostOffsetStart[1];
2221 
2222           /* Loop over columns 1/3: Left (Middle) (Middle) */
2223           if (!dummyStart[0]) {
2224             const PetscInt neighbor = 12;
2225             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2226               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2227               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2228                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2229               }
2230             }
2231           } else {
2232             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2233               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2234                 idxGlobalAll[count] = -1;
2235               }
2236             }
2237           }
2238 
2239           /* Loop over columns 2/3: (Middle) (Middle) (Middle) aka (Here) */
2240           {
2241             const PetscInt neighbor = 13;
2242             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2243               const PetscInt i = ighost - ghostOffsetStart[0];
2244               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2245                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2246               }
2247             }
2248           }
2249 
2250           /* Loop over columns 3/3: Right (Middle) (Middle) */
2251           if (!dummyEnd[0]) {
2252             const PetscInt neighbor = 14;
2253             PetscInt       i;
2254             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2255               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2256                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2257               }
2258             }
2259           } else {
2260             /* Partial dummies on this rank */
2261             const PetscInt neighbor = 13;
2262             PetscInt       i,dLocal;
2263             i = stag->n[0];
2264             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2265               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2266             }
2267             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
2268               idxGlobalAll[count] = -1;
2269             }
2270             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2271               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor]+ j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2272             }
2273             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
2274               idxGlobalAll[count] = -1;
2275             }
2276             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2277               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2278             }
2279             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2280               idxGlobalAll[count] = -1;
2281             }
2282             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2283               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2284             }
2285             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2286               idxGlobalAll[count] = -1;
2287             }
2288             ++i;
2289             for (;i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2290               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2291                 idxGlobalAll[count] = -1;
2292               }
2293             }
2294           }
2295         }
2296       }
2297 
2298       /* Loop over rows 3/3: Up (Middle) */
2299       if (!dummyEnd[1]) {
2300         PetscInt j;
2301         for (j=0; j<ghostOffsetEnd[1]; ++j) {
2302 
2303           /* Loop over columns 1/3: Left Up (Middle) */
2304           if (!star && !dummyStart[0]) {
2305             const PetscInt neighbor = 15;
2306             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2307               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2308               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2309                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2310               }
2311             }
2312           } else {
2313             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2314               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2315                 idxGlobalAll[count] = -1;
2316               }
2317             }
2318           }
2319 
2320           /* Loop over columns 2/3: (Middle) Up (Middle) */
2321           {
2322             const PetscInt neighbor = 16;
2323             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2324               const PetscInt i = ighost - ghostOffsetStart[0];
2325               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2326                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2327               }
2328             }
2329           }
2330 
2331           /* Loop over columns 3/3: Right Up (Middle) */
2332           if (!star && !dummyEnd[0]) {
2333             const PetscInt neighbor = 17;
2334             PetscInt       i;
2335             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2336               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2337                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2338               }
2339             }
2340           } else if (dummyEnd[0]) {
2341             /* Partial dummies on (Middle) Up (Middle) rank */
2342             const PetscInt neighbor = 16;
2343             PetscInt       i,dLocal;
2344             i = stag->n[0];
2345             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2346               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2347             }
2348             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
2349               idxGlobalAll[count] = -1;
2350             }
2351             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2352               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2353             }
2354             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
2355               idxGlobalAll[count] = -1;
2356             }
2357             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2358               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2359             }
2360             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2361               idxGlobalAll[count] = -1;
2362             }
2363             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2364               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2365             }
2366             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2367               idxGlobalAll[count] = -1;
2368             }
2369             ++i;
2370             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2371               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2372                 idxGlobalAll[count] = -1;
2373               }
2374             }
2375           } else {
2376             /* Right Up (Middle) dumies */
2377             PetscInt       i;
2378             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2379               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2380                 idxGlobalAll[count] = -1;
2381               }
2382             }
2383           }
2384         }
2385       } else {
2386         /* Up (Middle) partial dummy row */
2387         PetscInt j = stag->n[1];
2388 
2389         /* Up (Middle) partial dummy row: colums 1/3: Left Up (Middle), on Left (Middle) (Middle) rank */
2390         if (!dummyStart[0]) {
2391           const PetscInt neighbor = 12;
2392           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2393             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2394             PetscInt       dLocal;
2395             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2396               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2397             }
2398             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2399               idxGlobalAll[count] = -1;
2400             }
2401             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2402               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2403             }
2404             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2405               idxGlobalAll[count] = -1;
2406             }
2407           }
2408         } else {
2409           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2410             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2411               idxGlobalAll[count] = -1;
2412             }
2413           }
2414         }
2415 
2416         /* Up (Middle) partial dummy row: columns 2/3: (Middle) Up (Middle), on (Middle) (Middle) (Middle) = here rank */
2417         {
2418           const PetscInt neighbor = 13;
2419           for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2420             const PetscInt i = ighost - ghostOffsetStart[0];
2421             PetscInt       dLocal;
2422             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2423               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2424             }
2425             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2426               idxGlobalAll[count] = -1;
2427             }
2428             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2429               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2430             }
2431             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2432               idxGlobalAll[count] = -1;
2433             }
2434           }
2435         }
2436 
2437         if (!dummyEnd[0]) {
2438           /* Up (Middle) partial dummy row: columns 3/3: Right Up (Middle), on Right (Middle) (Middle) rank */
2439           const PetscInt neighbor = 14;
2440           PetscInt       i;
2441           for (i=0; i<ghostOffsetEnd[0]; ++i) {
2442             PetscInt       dLocal;
2443             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2444               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2445             }
2446             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2447               idxGlobalAll[count] = -1;
2448             }
2449             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2450               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2451             }
2452             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2453               idxGlobalAll[count] = -1;
2454             }
2455           }
2456         } else {
2457           /* Up (Middle) Right partial dummy element, on (Middle) (Middle) (Middle) = here rank */
2458           const PetscInt neighbor = 13;
2459           PetscInt       i,dLocal;
2460           i = stag->n[0];
2461           for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2462             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2463           }
2464           for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back down edge, back face and back left edge */
2465             idxGlobalAll[count] = -1;
2466           }
2467           for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2468             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2469           }
2470           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies down face, left face and element */
2471             idxGlobalAll[count] = -1;
2472           }
2473           ++i;
2474           for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2475             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2476               idxGlobalAll[count] = -1;
2477             }
2478           }
2479         }
2480         ++j;
2481         /* Up (Middle) additional dummy rows */
2482         for (; j<stag->n[1] + ghostOffsetEnd[1]; ++j) {
2483           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2484             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2485               idxGlobalAll[count] = -1;
2486             }
2487           }
2488         }
2489       }
2490     }
2491   }
2492 
2493   /* Loop over layers 3/3 : Front */
2494   if (!dummyEnd[2]) {
2495     PetscInt k;
2496     for (k=0; k<ghostOffsetEnd[2]; ++k) {
2497 
2498       /* Loop over rows 1/3: Down Front */
2499       if (!star && !dummyStart[1]) {
2500         for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
2501           const PetscInt j = nNeighbors[19][1] - ghostOffsetStart[1] + jghost; /* Constant for whole row (use down front neighbor) */
2502 
2503           /* Loop over columns 1/3: Left Down Front */
2504           if (!dummyStart[0]) {
2505             const PetscInt neighbor = 18;
2506             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2507               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2508               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2509                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2510               }
2511             }
2512           } else {
2513             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2514               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2515                 idxGlobalAll[count] = -1;
2516               }
2517             }
2518           }
2519 
2520           /* Loop over columns 2/3: (Middle) Down Front */
2521           {
2522             const PetscInt neighbor = 19;
2523             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2524               const PetscInt i = ighost - ghostOffsetStart[0];
2525               for (d=0; d<stag->entriesPerElement; ++d,++count) { /* vertex, 2 edges, and face associated with back face */
2526                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2527               }
2528             }
2529           }
2530 
2531           /* Loop over columns 3/3: Right Down Front */
2532           if (!dummyEnd[0]) {
2533             const PetscInt neighbor = 20;
2534             PetscInt       i;
2535             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2536               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2537                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2538               }
2539             }
2540           } else {
2541             /* Partial dummy element on (Middle) Down Front rank */
2542             const PetscInt neighbor = 19;
2543             PetscInt       i,dLocal;
2544             i = stag->n[0];
2545             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2546               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2547             }
2548             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
2549               idxGlobalAll[count] = -1;
2550             }
2551             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2552               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2553             }
2554             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
2555               idxGlobalAll[count] = -1;
2556             }
2557             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2558               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2559             }
2560             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2561               idxGlobalAll[count] = -1;
2562             }
2563             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2564               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2565             }
2566             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2567               idxGlobalAll[count] = -1;
2568             }
2569             ++i;
2570             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2571               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2572                 idxGlobalAll[count] = -1;
2573               }
2574             }
2575           }
2576         }
2577       } else {
2578         for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
2579           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2580             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2581               idxGlobalAll[count] = -1;
2582             }
2583           }
2584         }
2585       }
2586 
2587       /* Loop over rows 2/3: (Middle) Front */
2588       {
2589         for (jghost = ghostOffsetStart[1]; jghost<stag->nGhost[1]-ghostOffsetEnd[1]; ++jghost) {
2590           const PetscInt j = jghost - ghostOffsetStart[1];
2591 
2592           /* Loop over columns 1/3: Left (Middle) Front*/
2593           if (!star && !dummyStart[0]) {
2594             const PetscInt neighbor = 21;
2595             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2596               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2597               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2598                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2599               }
2600             }
2601           } else {
2602             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2603               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2604                 idxGlobalAll[count] = -1;
2605               }
2606             }
2607           }
2608 
2609           /* Loop over columns 2/3: (Middle) (Middle) Front*/
2610           {
2611             const PetscInt neighbor = 22;
2612             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2613               const PetscInt i = ighost - ghostOffsetStart[0];
2614               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2615                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2616               }
2617             }
2618           }
2619 
2620           /* Loop over columns 3/3: Right (Middle) Front*/
2621           if (!star && !dummyEnd[0]) {
2622             const PetscInt neighbor = 23;
2623             PetscInt       i;
2624             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2625               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2626                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2627               }
2628             }
2629           } else if (dummyEnd[0]) {
2630             /* Partial dummy element on (Middle) (Middle) Front element */
2631             const PetscInt neighbor = 22;
2632             PetscInt       i,dLocal;
2633             i = stag->n[0];
2634             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2635               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2636             }
2637             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
2638               idxGlobalAll[count] = -1;
2639             }
2640             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2641               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2642             }
2643             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
2644               idxGlobalAll[count] = -1;
2645             }
2646             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2647               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2648             }
2649             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2650               idxGlobalAll[count] = -1;
2651             }
2652             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2653               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2654             }
2655             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2656               idxGlobalAll[count] = -1;
2657             }
2658             ++i;
2659             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2660               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2661                 idxGlobalAll[count] = -1;
2662               }
2663             }
2664           } else {
2665             /* Right (Middle) Front dummies */
2666             PetscInt       i;
2667             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2668               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2669                 idxGlobalAll[count] = -1;
2670               }
2671             }
2672           }
2673         }
2674       }
2675 
2676       /* Loop over rows 3/3: Up Front */
2677       if (!star && !dummyEnd[1]) {
2678         PetscInt j;
2679         for (j=0; j<ghostOffsetEnd[1]; ++j) {
2680 
2681           /* Loop over columns 1/3: Left Up Front */
2682           if (!dummyStart[0]) {
2683             const PetscInt neighbor = 24;
2684             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2685               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2686               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2687                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2688               }
2689             }
2690           } else {
2691             for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2692               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2693                 idxGlobalAll[count] = -1;
2694               }
2695             }
2696           }
2697 
2698           /* Loop over columns 2/3: (Middle) Up Front */
2699           {
2700             const PetscInt neighbor = 25;
2701             for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2702               const PetscInt i = ighost - ghostOffsetStart[0];
2703               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2704                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2705               }
2706             }
2707           }
2708 
2709           /* Loop over columns 3/3: Right Up Front */
2710           if (!dummyEnd[0]) {
2711             const PetscInt neighbor = 26;
2712             PetscInt     i;
2713             for (i=0; i<ghostOffsetEnd[0]; ++i) {
2714               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2715                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2716               }
2717             }
2718           } else {
2719             /* Partial dummy element on (Middle) Up Front rank */
2720             const PetscInt neighbor = 25;
2721             PetscInt       i,dLocal;
2722             i = stag->n[0];
2723             for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2724               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2725             }
2726             for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
2727               idxGlobalAll[count] = -1;
2728             }
2729             for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2730               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2731             }
2732             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face */
2733               idxGlobalAll[count] = -1;
2734             }
2735             for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2736               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2737             }
2738             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2739               idxGlobalAll[count] = -1;
2740             }
2741             for (; dLocal<stag->dof[0]+3*stag->dof[1]+3*stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2742               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*stag->entriesPerElement + d;
2743             }
2744             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2745               idxGlobalAll[count] = -1;
2746             }
2747             ++i;
2748             for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2749               for (d=0; d<stag->entriesPerElement; ++d,++count) {
2750                 idxGlobalAll[count] = -1;
2751               }
2752             }
2753           }
2754         }
2755       } else if (dummyEnd[1]) {
2756         /* Up Front partial dummy row */
2757         PetscInt j = stag->n[1];
2758 
2759         /* Up Front partial dummy row: columns 1/3: Left Up Front, on Left (Middle) Front rank */
2760         if (!star && !dummyStart[0]) {
2761           const PetscInt neighbor = 21;
2762           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2763             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2764             PetscInt       dLocal;
2765             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2766               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2767             }
2768             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2769               idxGlobalAll[count] = -1;
2770             }
2771             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2772               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2773             }
2774             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2775               idxGlobalAll[count] = -1;
2776             }
2777           }
2778         } else {
2779           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2780             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2781               idxGlobalAll[count] = -1;
2782             }
2783           }
2784         }
2785 
2786         /* Up Front partial dummy row: columns 2/3: (Middle) Up Front, on (Middle) (Middle) Front rank */
2787         {
2788           const PetscInt neighbor = 22;
2789           for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2790             const PetscInt i = ighost - ghostOffsetStart[0];
2791             PetscInt       dLocal;
2792             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2793               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2794             }
2795             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2796               idxGlobalAll[count] = -1;
2797             }
2798             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2799               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2800             }
2801             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2802               idxGlobalAll[count] = -1;
2803             }
2804           }
2805         }
2806 
2807         if (!star && !dummyEnd[0]) {
2808           /* Up Front partial dummy row: columns 3/3: Right Up Front, on Right (Middle) Front rank */
2809           const PetscInt neighbor = 23;
2810           PetscInt       i;
2811           for (i=0; i<ghostOffsetEnd[0]; ++i) {
2812             PetscInt       dLocal;
2813             for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex and back down edge */
2814               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2815             }
2816             for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back face and back left edge */
2817               idxGlobalAll[count] = -1;
2818             }
2819             for (; dLocal<stag->dof[0]+3*stag->dof[1]+2*stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge and down face */
2820               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2821             }
2822             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2823               idxGlobalAll[count] = -1;
2824             }
2825           }
2826 
2827         } else if (dummyEnd[0]) {
2828           /* Partial Right Up Front dummy element, on (Middle) (Middle) Front rank */
2829           const PetscInt neighbor = 22;
2830           PetscInt       i,dLocal;
2831           i = stag->n[0];
2832           for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2833             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2834           }
2835           for (; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++dLocal,++count) { /* Dummies on back down edge, back face and back left edge */
2836             idxGlobalAll[count] = -1;
2837           }
2838           for (; dLocal<stag->dof[0]+3*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2839             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*eprNeighbor[neighbor] + i*entriesPerFace + d; /* Note i increment by face */
2840           }
2841           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies down face, left face and element */
2842             idxGlobalAll[count] = -1;
2843           }
2844           ++i;
2845           for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2846             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2847               idxGlobalAll[count] = -1;
2848             }
2849           }
2850         } else {
2851           /* Right Up Front dummies */
2852           PetscInt i;
2853           for (i=0; i<ghostOffsetEnd[0]; ++i) {
2854             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2855               idxGlobalAll[count] = -1;
2856             }
2857           }
2858         }
2859         ++j;
2860         /* Up Front additional dummy rows */
2861         for (; j<stag->n[1] + ghostOffsetEnd[1]; ++j) {
2862           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2863             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2864               idxGlobalAll[count] = -1;
2865             }
2866           }
2867         }
2868       } else {
2869         /* Up Front dummies */
2870         PetscInt j;
2871         for (j=0; j<ghostOffsetEnd[1]; ++j) {
2872           for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2873             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2874               idxGlobalAll[count] = -1;
2875             }
2876           }
2877         }
2878       }
2879     }
2880   } else {
2881     PetscInt k = stag->n[2];
2882 
2883     /* Front partial ghost layer: rows 1/3: Down Front, on Down (Middle) */
2884     if (!dummyStart[1]) {
2885       for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
2886         const PetscInt j = nNeighbors[10][1] - ghostOffsetStart[1] + jghost; /* wrt down neighbor (10) */
2887 
2888         /* Down Front partial ghost row: colums 1/3: Left Down Front, on  Left Down (Middle) */
2889         if (!star && !dummyStart[0]) {
2890           const PetscInt neighbor = 9;
2891           const PetscInt epFaceRow         = entriesPerFace * nNeighbors[neighbor][0]; /* Note that we can't be a right boundary */
2892           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2893             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2894             PetscInt  dLocal;
2895             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
2896               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
2897             }
2898             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2899               idxGlobalAll[count] = -1;
2900             }
2901           }
2902         } else {
2903           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2904             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2905               idxGlobalAll[count] = -1;
2906             }
2907           }
2908         }
2909 
2910         /* Down Front partial ghost row: columns 2/3: (Middle) Down Front, on (Middle) Down (Middle) */
2911         {
2912           const PetscInt neighbor = 10;
2913           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
2914           for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
2915             const PetscInt i = ighost - ghostOffsetStart[0];
2916             PetscInt  dLocal;
2917             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
2918               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
2919             }
2920             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2921               idxGlobalAll[count] = -1;
2922             }
2923           }
2924         }
2925 
2926         if (!star && !dummyEnd[0]) {
2927           /* Down Front partial dummy row: columns 3/3: Right Down Front, on Right Down (Middle) */
2928           const PetscInt neighbor = 11;
2929           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
2930           PetscInt       i;
2931           for (i=0; i<ghostOffsetEnd[0]; ++i) {
2932             PetscInt  dLocal;
2933             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
2934               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
2935             }
2936             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2937               idxGlobalAll[count] = -1;
2938             }
2939           }
2940         } else if (dummyEnd[0]) {
2941           /* Right Down Front partial dummy element, living on (Middle) Down (Middle) rank */
2942           const PetscInt neighbor = 10;
2943           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
2944           PetscInt       i,dLocal;
2945           i = stag->n[0];
2946           for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2947             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note i increment by face */
2948           }
2949           for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
2950             idxGlobalAll[count] = -1;
2951           }
2952           for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* left back edge */
2953             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note i increment by face */
2954           }
2955           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
2956             idxGlobalAll[count] = -1;
2957           }
2958           ++i;
2959           for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
2960             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2961               idxGlobalAll[count] = -1;
2962             }
2963           }
2964         } else {
2965           PetscInt i;
2966           /* Right Down Front dummies */
2967           for (i=0; i<ghostOffsetEnd[0]; ++i) {
2968             for (d=0; d<stag->entriesPerElement; ++d,++count) {
2969               idxGlobalAll[count] = -1;
2970             }
2971           }
2972         }
2973       }
2974     } else {
2975       for (jghost = 0; jghost<ghostOffsetStart[1]; ++jghost) {
2976         for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
2977           for (d=0; d<stag->entriesPerElement; ++d,++count) {
2978             idxGlobalAll[count] = -1;
2979           }
2980         }
2981       }
2982     }
2983 
2984     /* Front partial ghost layer: rows 2/3: (Middle) Front, on (Middle) (Middle) */
2985     {
2986       for (jghost = ghostOffsetStart[1]; jghost<stag->nGhost[1]-ghostOffsetEnd[1]; ++jghost) {
2987         const PetscInt j = jghost - ghostOffsetStart[1];
2988 
2989         /* (Middle) Front partial dummy row: columns 1/3: Left (Middle) Front, on Left (Middle) (Middle) */
2990         if (!dummyStart[0]) {
2991           const PetscInt neighbor = 12;
2992           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0];
2993           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
2994             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2995             PetscInt  dLocal;
2996             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
2997               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
2998             }
2999             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3000               idxGlobalAll[count] = -1;
3001             }
3002           }
3003         } else {
3004           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
3005             for (d=0; d<stag->entriesPerElement; ++d,++count) {
3006               idxGlobalAll[count] = -1;
3007             }
3008           }
3009         }
3010 
3011         /* (Middle) Front partial dummy row: columns 2/3: (Middle) (Middle) Front, on (Middle) (Middle) (Middle) */
3012         {
3013           const PetscInt neighbor = 13;
3014           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
3015           for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
3016             const PetscInt i = ighost - ghostOffsetStart[0];
3017             PetscInt  dLocal;
3018             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
3019               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
3020             }
3021             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3022               idxGlobalAll[count] = -1;
3023             }
3024           }
3025         }
3026 
3027         if (!dummyEnd[0]) {
3028           /* (Middle) Front partial dummy row: columns 3/3: Right (Middle) Front, on Right (Middle) (Middle) */
3029           const PetscInt neighbor = 14;
3030           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
3031           PetscInt i;
3032           for (i=0; i<ghostOffsetEnd[0]; ++i) {
3033             PetscInt  dLocal;
3034             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
3035               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
3036             }
3037             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3038               idxGlobalAll[count] = -1;
3039             }
3040           }
3041         } else {
3042           /* Right (Middle) Front partial dummy element, on (Middle) (Middle) (Middle) rank */
3043           const PetscInt neighbor = 13;
3044           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
3045           PetscInt       i,dLocal;
3046           i = stag->n[0];
3047           for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
3048             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note i increment by face */
3049           }
3050           for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
3051             idxGlobalAll[count] = -1;
3052           }
3053           for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* left back edge */
3054             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note i increment by face */
3055           }
3056           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3057             idxGlobalAll[count] = -1;
3058           }
3059           ++i;
3060           for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
3061             for (d=0; d<stag->entriesPerElement; ++d,++count) {
3062               idxGlobalAll[count] = -1;
3063             }
3064           }
3065         }
3066       }
3067     }
3068 
3069     /* Front partial ghost layer: rows 3/3: Up Front, on Up (Middle) */
3070     if (!dummyEnd[1]) {
3071       PetscInt j;
3072       for (j=0; j<ghostOffsetEnd[1]; ++j) {
3073 
3074         /* Up Front partial dummy row: columns 1/3: Left Up Front, on Left Up (Middle) */
3075         if (!star && !dummyStart[0]) {
3076           const PetscInt neighbor = 15;
3077           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0];
3078           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
3079             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
3080             PetscInt  dLocal;
3081             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
3082               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
3083             }
3084             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3085               idxGlobalAll[count] = -1;
3086             }
3087           }
3088         } else {
3089           for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
3090             for (d=0; d<stag->entriesPerElement; ++d,++count) {
3091               idxGlobalAll[count] = -1;
3092             }
3093           }
3094         }
3095 
3096         /* Up Front partial dummy row: columns 2/3: (Middle) Up Front, on (Middle) Up (Middle) */
3097         {
3098           const PetscInt neighbor = 16;
3099           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
3100           for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
3101             const PetscInt i = ighost - ghostOffsetStart[0];
3102             PetscInt  dLocal;
3103             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
3104               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
3105             }
3106             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3107               idxGlobalAll[count] = -1;
3108             }
3109           }
3110         }
3111 
3112         if (!star && !dummyEnd[0]) {
3113           /* Up Front partial dummy row: columsn 3/3: Right Up Front, on Right Up (Middle) */
3114           const PetscInt neighbor = 17;
3115           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
3116           PetscInt       i;
3117           for (i=0; i<ghostOffsetEnd[0]; ++i) {
3118             PetscInt  dLocal;
3119             for (d=0,dLocal=0; dLocal<stag->dof[0]+2*stag->dof[1]+stag->dof[2]; ++d, ++dLocal, ++count) { /* Vertex, back down edge, back face, back left edge */
3120               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note j increment by face */
3121             }
3122             for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3123               idxGlobalAll[count] = -1;
3124             }
3125           }
3126         } else if (dummyEnd[0]) {
3127           /* Right Up Front partial dummy element, on (Middle) Up (Middle) */
3128           const PetscInt neighbor = 16;
3129           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
3130           PetscInt       i,dLocal;
3131           i = stag->n[0];
3132           for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
3133             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note i increment by face */
3134           }
3135           for (; dLocal<stag->dof[0]+stag->dof[1]; ++dLocal,++count) { /* Dummies on back down edge */
3136             idxGlobalAll[count] = -1;
3137           }
3138           for (; dLocal<stag->dof[0]+2*stag->dof[1]; ++d, ++dLocal, ++count) { /* left back edge */
3139             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerFace + d; /* Note i increment by face */
3140           }
3141           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3142             idxGlobalAll[count] = -1;
3143           }
3144           ++i;
3145           for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
3146             for (d=0; d<stag->entriesPerElement; ++d,++count) {
3147               idxGlobalAll[count] = -1;
3148             }
3149           }
3150         } else {
3151           /* Right Up Front dummies */
3152           PetscInt i;
3153           /* Right Down Front dummies */
3154           for (i=0; i<ghostOffsetEnd[0]; ++i) {
3155             for (d=0; d<stag->entriesPerElement; ++d,++count) {
3156               idxGlobalAll[count] = -1;
3157             }
3158           }
3159         }
3160       }
3161     } else {
3162       /* Up Front partial dummy row */
3163       PetscInt j = stag->n[1];
3164 
3165       /* Up Front partial dummy row: columns 1/3: Left Up Front, on Left (Middle) (Middle) */
3166       if (!dummyStart[0]) {
3167         const PetscInt neighbor = 12;
3168         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0];
3169         for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
3170           const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
3171           PetscInt       dLocal;
3172           for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex  and down back edge */
3173             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerEdge + d; /* Note increments */
3174           }
3175           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3176             idxGlobalAll[count] = -1;
3177           }
3178         }
3179       } else {
3180         for (ighost = 0; ighost<ghostOffsetStart[0]; ++ighost) {
3181           for (d=0; d<stag->entriesPerElement; ++d,++count) {
3182             idxGlobalAll[count] = -1;
3183           }
3184         }
3185       }
3186 
3187       /* Up Front partial dummy row: columns 2/3: (Middle) Up Front, on (Middle) (Middle) (Middle) */
3188       {
3189         const PetscInt neighbor = 13;
3190         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
3191         for (ighost = ghostOffsetStart[0]; ighost<stag->nGhost[0]-ghostOffsetEnd[0]; ++ighost) {
3192           const PetscInt i = ighost - ghostOffsetStart[0];
3193           PetscInt       dLocal;
3194           for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex  and down back edge */
3195             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerEdge + d; /* Note increments */
3196           }
3197           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3198             idxGlobalAll[count] = -1;
3199           }
3200         }
3201       }
3202 
3203       if (!dummyEnd[0]) {
3204         /* Up Front partial dummy row: columns 3/3: Right Up Front, on Right (Middle) (Middle) */
3205         const PetscInt neighbor = 14;
3206         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
3207         PetscInt       i;
3208         for (i=0; i<ghostOffsetEnd[0]; ++i) {
3209           PetscInt       dLocal;
3210           for (d=0,dLocal=0; dLocal<stag->dof[0]+stag->dof[1]; ++d, ++dLocal, ++count) { /* Vertex  and down back edge */
3211             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerEdge + d; /* Note increments */
3212           }
3213           for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3214             idxGlobalAll[count] = -1;
3215           }
3216         }
3217       } else {
3218         /* Right Up Front partial dummy element, on this rank */
3219         const PetscInt neighbor = 13;
3220         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
3221         PetscInt       i,dLocal;
3222         i = stag->n[0];
3223         for (d=0,dLocal=0; dLocal<stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
3224           idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k*eplNeighbor[neighbor] + j*epFaceRow + i*entriesPerEdge + d; /* Note increments */
3225         }
3226         for (; dLocal<stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3227           idxGlobalAll[count] = -1;
3228         }
3229         ++i;
3230         for (; i<stag->n[0] + ghostOffsetEnd[0]; ++i) {
3231           for (d=0; d<stag->entriesPerElement; ++d,++count) {
3232             idxGlobalAll[count] = -1;
3233           }
3234         }
3235       }
3236       ++j;
3237       /* Up Front additional dummy rows */
3238       for (; j<stag->n[1] + ghostOffsetEnd[1]; ++j) {
3239         for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
3240           for (d=0; d<stag->entriesPerElement; ++d,++count) {
3241             idxGlobalAll[count] = -1;
3242           }
3243         }
3244       }
3245     }
3246     /* Front additional dummy layers */
3247     ++k;
3248     for (; k<stag->n[2] + ghostOffsetEnd[2]; ++k) {
3249       for (jghost = 0; jghost<stag->nGhost[1]; ++jghost) {
3250         for (ighost = 0; ighost<stag->nGhost[0]; ++ighost) {
3251           for (d=0; d<stag->entriesPerElement; ++d,++count) {
3252             idxGlobalAll[count] = -1;
3253           }
3254         }
3255       }
3256     }
3257   }
3258 
3259   /* Create local-to-global map (in local ordering, includes maps to -1 for dummy points) */
3260   ierr = ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)dm),1,stag->entriesGhost,idxGlobalAll,PETSC_OWN_POINTER,&dm->ltogmap);CHKERRQ(ierr);
3261   ierr = PetscLogObjectParent((PetscObject)dm,(PetscObject)dm->ltogmap);CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
DMStagComputeLocationOffsets_3d(DM dm)3265 static PetscErrorCode DMStagComputeLocationOffsets_3d(DM dm)
3266 {
3267   PetscErrorCode  ierr;
3268   DM_Stag * const stag = (DM_Stag*)dm->data;
3269   const PetscInt epe = stag->entriesPerElement;
3270   const PetscInt epr = stag->nGhost[0]*epe;
3271   const PetscInt epl = stag->nGhost[1]*epr;
3272 
3273   PetscFunctionBegin;
3274   ierr = PetscMalloc1(DMSTAG_NUMBER_LOCATIONS,&stag->locationOffsets);CHKERRQ(ierr);
3275   stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT]   = 0;
3276   stag->locationOffsets[DMSTAG_BACK_DOWN]        = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT]  + stag->dof[0];
3277   stag->locationOffsets[DMSTAG_BACK_DOWN_RIGHT]  = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT]  + epe;
3278   stag->locationOffsets[DMSTAG_BACK_LEFT]        = stag->locationOffsets[DMSTAG_BACK_DOWN]       + stag->dof[1];
3279   stag->locationOffsets[DMSTAG_BACK]             = stag->locationOffsets[DMSTAG_BACK_LEFT]       + stag->dof[1];
3280   stag->locationOffsets[DMSTAG_BACK_RIGHT]       = stag->locationOffsets[DMSTAG_BACK_LEFT]       + epe;
3281   stag->locationOffsets[DMSTAG_BACK_UP_LEFT]     = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT]  + epr;
3282   stag->locationOffsets[DMSTAG_BACK_UP]          = stag->locationOffsets[DMSTAG_BACK_DOWN]       + epr;
3283   stag->locationOffsets[DMSTAG_BACK_UP_RIGHT]    = stag->locationOffsets[DMSTAG_BACK_UP_LEFT]    + epe;
3284   stag->locationOffsets[DMSTAG_DOWN_LEFT]        = stag->locationOffsets[DMSTAG_BACK]            + stag->dof[2];
3285   stag->locationOffsets[DMSTAG_DOWN]             = stag->locationOffsets[DMSTAG_DOWN_LEFT]       + stag->dof[1];
3286   stag->locationOffsets[DMSTAG_DOWN_RIGHT]       = stag->locationOffsets[DMSTAG_DOWN_LEFT]       + epe;
3287   stag->locationOffsets[DMSTAG_LEFT]             = stag->locationOffsets[DMSTAG_DOWN]            + stag->dof[2];
3288   stag->locationOffsets[DMSTAG_ELEMENT]          = stag->locationOffsets[DMSTAG_LEFT]            + stag->dof[2];
3289   stag->locationOffsets[DMSTAG_RIGHT]            = stag->locationOffsets[DMSTAG_LEFT]            + epe;
3290   stag->locationOffsets[DMSTAG_UP_LEFT]          = stag->locationOffsets[DMSTAG_DOWN_LEFT]       + epr;
3291   stag->locationOffsets[DMSTAG_UP]               = stag->locationOffsets[DMSTAG_DOWN]            + epr;
3292   stag->locationOffsets[DMSTAG_UP_RIGHT]         = stag->locationOffsets[DMSTAG_UP_LEFT]         + epe;
3293   stag->locationOffsets[DMSTAG_FRONT_DOWN_LEFT]  = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT]  + epl;
3294   stag->locationOffsets[DMSTAG_FRONT_DOWN]       = stag->locationOffsets[DMSTAG_BACK_DOWN]       + epl;
3295   stag->locationOffsets[DMSTAG_FRONT_DOWN_RIGHT] = stag->locationOffsets[DMSTAG_FRONT_DOWN_LEFT] + epe;
3296   stag->locationOffsets[DMSTAG_FRONT_LEFT]       = stag->locationOffsets[DMSTAG_BACK_LEFT]       + epl;
3297   stag->locationOffsets[DMSTAG_FRONT]            = stag->locationOffsets[DMSTAG_BACK]            + epl;
3298   stag->locationOffsets[DMSTAG_FRONT_RIGHT]      = stag->locationOffsets[DMSTAG_FRONT_LEFT]      + epe;
3299   stag->locationOffsets[DMSTAG_FRONT_UP_LEFT]    = stag->locationOffsets[DMSTAG_FRONT_DOWN_LEFT] + epr;
3300   stag->locationOffsets[DMSTAG_FRONT_UP]         = stag->locationOffsets[DMSTAG_FRONT_DOWN]      + epr;
3301   stag->locationOffsets[DMSTAG_FRONT_UP_RIGHT]   = stag->locationOffsets[DMSTAG_FRONT_UP_LEFT]   + epe;
3302   PetscFunctionReturn(0);
3303 }
3304 
DMStagPopulateLocalToGlobalInjective_3d(DM dm)3305 PETSC_INTERN PetscErrorCode DMStagPopulateLocalToGlobalInjective_3d(DM dm)
3306 {
3307   PetscErrorCode  ierr;
3308   DM_Stag * const stag = (DM_Stag*)dm->data;
3309   PetscInt        *idxLocal,*idxGlobal,*globalOffsetsRecomputed;
3310   const PetscInt  *globalOffsets;
3311   PetscInt        count,d,entriesPerEdge,entriesPerFace,eprGhost,eplGhost,ghostOffsetStart[3],ghostOffsetEnd[3];
3312   IS              isLocal,isGlobal;
3313   PetscBool       dummyEnd[3];
3314 
3315   PetscFunctionBegin;
3316   ierr = DMStagSetUpBuildGlobalOffsets_3d(dm,&globalOffsetsRecomputed);CHKERRQ(ierr); /* note that we don't actually use all of these. An available optimization is to pass them, when available */
3317   globalOffsets = globalOffsetsRecomputed;
3318   ierr = PetscMalloc1(stag->entries,&idxLocal);CHKERRQ(ierr);
3319   ierr = PetscMalloc1(stag->entries,&idxGlobal);CHKERRQ(ierr);
3320   for (d=0; d<3; ++d) dummyEnd[d]   = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
3321   entriesPerFace                      = stag->dof[0] + 2*stag->dof[1] + stag->dof[2];
3322   entriesPerEdge                      = stag->dof[0] + stag->dof[1];
3323   eprGhost                            = stag->nGhost[0]*stag->entriesPerElement; /* epr = entries per (element) row   */
3324   eplGhost                            = stag->nGhost[1]*eprGhost;                /* epl = entries per (element) layer */
3325   count = 0;
3326   for (d=0; d<3; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
3327   for (d=0; d<3; ++d) ghostOffsetEnd[d]   = stag->startGhost[d]+stag->nGhost[d] - (stag->start[d]+stag->n[d]);
3328   {
3329     const PetscInt  neighbor     = 13;
3330     const PetscInt  epr          = stag->entriesPerElement * stag->n[0] + (dummyEnd[0] ? entriesPerFace : 0); /* We may be a right boundary */
3331     const PetscInt  epl          = epr                     * stag->n[1] + (dummyEnd[1] ? stag->n[0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We may be a top boundary */
3332     const PetscInt  epFaceRow    = entriesPerFace          * stag->n[0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
3333     const PetscInt  start0       = 0;
3334     const PetscInt  start1       = 0;
3335     const PetscInt  start2       = 0;
3336     const PetscInt  startGhost0  = ghostOffsetStart[0];
3337     const PetscInt  startGhost1  = ghostOffsetStart[1];
3338     const PetscInt  startGhost2  = ghostOffsetStart[2];
3339     const PetscInt  endGhost0    = stag->nGhost[0] - ghostOffsetEnd[0];
3340     const PetscInt  endGhost1    = stag->nGhost[1] - ghostOffsetEnd[1];
3341     const PetscInt  endGhost2    = stag->nGhost[2] - ghostOffsetEnd[2];
3342     const PetscBool extra0       = dummyEnd[0];
3343     const PetscBool extra1       = dummyEnd[1];
3344     const PetscBool extra2       = dummyEnd[2];
3345     ierr = DMStagSetUpBuildScatterPopulateIdx_3d(stag,&count,idxLocal,idxGlobal,entriesPerEdge,entriesPerFace,epr,epl,eprGhost,eplGhost,epFaceRow,globalOffsets[stag->neighbors[neighbor]],start0,start1,start2,startGhost0,startGhost1,startGhost2,endGhost0,endGhost1,endGhost2,extra0,extra1,extra2);CHKERRQ(ierr);
3346   }
3347   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm),stag->entries,idxLocal,PETSC_OWN_POINTER,&isLocal);CHKERRQ(ierr);
3348   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm),stag->entries,idxGlobal,PETSC_OWN_POINTER,&isGlobal);CHKERRQ(ierr);
3349   {
3350     Vec local,global;
3351     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm),1,stag->entries,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr);
3352     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,stag->entriesPerElement,stag->entriesGhost,NULL,&local);CHKERRQ(ierr);
3353     ierr = VecScatterCreate(local,isLocal,global,isGlobal,&stag->ltog_injective);CHKERRQ(ierr);
3354     ierr = VecDestroy(&global);CHKERRQ(ierr);
3355     ierr = VecDestroy(&local);CHKERRQ(ierr);
3356   }
3357   ierr = ISDestroy(&isLocal);CHKERRQ(ierr);
3358   ierr = ISDestroy(&isGlobal);CHKERRQ(ierr);
3359   if (globalOffsetsRecomputed) {
3360     ierr = PetscFree(globalOffsetsRecomputed);CHKERRQ(ierr);
3361   }
3362   PetscFunctionReturn(0);
3363 }
3364