1 // Copyright (c) 2017-2021, Lawrence Livermore National Security, LLC and
2 // other Axom Project Developers. See the top-level LICENSE file for details.
3 //
4 // SPDX-License-Identifier: (BSD-3-Clause)
5 
6 #include "lulesh.hpp"
7 
8 #include <math.h>
9 
10 #ifdef AXOM_USE_MPI
11   #include <mpi.h>
12 #endif
13 #ifdef AXOM_USE_OPENMP
14   #include <omp.h>
15 #endif
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <cstdlib>
22 #include <algorithm>
23 
24 
25 namespace slamLulesh {
26 
27 /////////////////////////////////////////////////////////////////////
Domain(Int_t numRanks,Index_t colLoc,Index_t rowLoc,Index_t planeLoc,Index_t nx,Int_t tp,Int_t nr,Int_t balance,Int_t cost)28   Domain::Domain(Int_t numRanks, Index_t colLoc,
29       Index_t rowLoc, Index_t planeLoc,
30       Index_t nx, Int_t tp, Int_t nr, Int_t balance, Int_t cost)
31       : m_e_cut(Real_t(1.0e-7)),
32         m_p_cut(Real_t(1.0e-7)),
33         m_q_cut(Real_t(1.0e-7)),
34         m_v_cut(Real_t(1.0e-10)),
35         m_u_cut(Real_t(1.0e-7)),
36         m_hgcoef(Real_t(3.0)),
37         m_ss4o3(Real_t(4.0) / Real_t(3.0)),
38         m_qstop(Real_t(1.0e+12)),
39         m_monoq_max_slope(Real_t(1.0)),
40         m_monoq_limiter_mult(Real_t(2.0)),
41         m_qlc_monoq(Real_t(0.5)),
42         m_qqc_monoq(Real_t(2.0) / Real_t(3.0)),
43         m_qqc(Real_t(2.0)),
44         m_eosvmax(Real_t(1.0e+9)),
45         m_eosvmin(Real_t(1.0e-9)),
46         m_pmin(Real_t(0.)),
47         m_emin(Real_t(-1.0e+15)),
48         m_dvovmax(Real_t(0.1)),
49         m_refdens(Real_t(1.0))
50   {
51 
52     Index_t edgeElems = nx;
53     Index_t edgeNodes = edgeElems + 1;
54 
55     this->cost() = cost;
56 
57     m_tp       = tp;
58     m_numRanks = numRanks;
59 
60     ///////////////////////////////
61     //   Initialize Sedov Mesh
62     ///////////////////////////////
63 
64     // construct a uniform box for this processor
65 
66     m_colLoc   =   colLoc;
67     m_rowLoc   =   rowLoc;
68     m_planeLoc = planeLoc;
69 
70     m_sizeX = edgeElems;
71     m_sizeY = edgeElems;
72     m_sizeZ = edgeElems;
73     m_elemSet = ElemSet(edgeElems * edgeElems * edgeElems);
74     m_nodeSet = NodeSet(edgeNodes * edgeNodes * edgeNodes);
75     m_cornerSet = CornerSet( m_elemSet.size() * 8);
76 
77     Int_t facesPerPlane = edgeElems * edgeElems;
78     Int_t numExtendedElems = m_elemSet.size()   // local elem
79         + 2 * facesPerPlane                     // plane ghosts
80         + 2 * facesPerPlane                     // row ghosts
81         + 2 * facesPerPlane                     // col ghosts
82     ;
83 
84     m_extendedElemSet = ExtendedElemSet( numExtendedElems );
85 
86 
87     // Elem-centered
88     AllocateElemPersistent(numElem());
89 
90     // Node-centered
91     AllocateNodePersistent(numNode());
92 
93     SetupCommBuffers();
94 
95     // Basic Field Initialization
96     for (Index_t i = 0; i<numElem(); ++i)
97     {
98       e(i) =  Real_t(0.0);
99       p(i) =  Real_t(0.0);
100       q(i) =  Real_t(0.0);
101       ss(i) = Real_t(0.0);
102     }
103 
104     // Note - v initializes to 1.0, not 0.0!
105     for (Index_t i = 0; i<numElem(); ++i)
106     {
107       v(i) = Real_t(1.0);
108     }
109 
110     for (Index_t i = 0; i<numNode(); ++i)
111     {
112       xd(i) = Real_t(0.0);
113       yd(i) = Real_t(0.0);
114       zd(i) = Real_t(0.0);
115     }
116 
117     for (Index_t i = 0; i<numNode(); ++i)
118     {
119       xdd(i) = Real_t(0.0);
120       ydd(i) = Real_t(0.0);
121       zdd(i) = Real_t(0.0);
122     }
123 
124     for (Index_t i = 0; i<numNode(); ++i)
125     {
126       nodalMass(i) = Real_t(0.0);
127     }
128 
129     BuildMesh(nx, edgeNodes, edgeElems);
130 
131 #ifdef AXOM_USE_OPENMP
132     SetupThreadSupportStructures();
133 #endif
134 
135     // Setup region index sets. For now, these are constant sized
136     // throughout the run, but could be changed every cycle to
137     // simulate effects of ALE on the lagrange solver
138     CreateRegionIndexSets(nr, balance);
139 
140     // Setup symmetry nodesets
141     SetupSymmetryPlanes(edgeNodes);
142 
143     // Setup element connectivities
144     SetupElementConnectivities(edgeElems);
145 
146     // Setup symmetry planes and free surface boundary arrays
147     SetupBoundaryConditions(edgeElems);
148 
149 
150     // Setup defaults
151 
152     // These can be changed (requires recompile) if you want to run
153     // with a fixed timestep, or to a different end time, but it's
154     // probably easier/better to just run a fixed number of timesteps
155     // using the -i flag in 2.x
156 
157     dtfixed() = Real_t(-1.0e-6); // Negative means use courant condition
158     stoptime()  = Real_t(1.0e-2);// *Real_t(edgeElems*tp/45.0) ;
159 
160     // Initial conditions
161     deltatimemultlb() = Real_t(1.1);
162     deltatimemultub() = Real_t(1.2);
163     dtcourant()       = Real_t(1.0e+20);
164     dthydro()         = Real_t(1.0e+20);
165     dtmax()           = Real_t(1.0e-2);
166     time()            = Real_t(0.);
167     cycle()           = Int_t(0);
168 
169     // initialize field data
170     for (Index_t i = 0; i<numElem(); ++i)
171     {
172       Real_t x_local[8], y_local[8], z_local[8];
173       ElemNodeSet elemToNode = nodelist(i);
174       for( Index_t lnode = 0; lnode< elemToNode.size(); ++lnode )
175       {
176         Index_t gnode = elemToNode[lnode];
177         x_local[lnode] = x(gnode);
178         y_local[lnode] = y(gnode);
179         z_local[lnode] = z(gnode);
180       }
181 
182       // volume calculations
183       Real_t volume = CalcElemVolume(x_local, y_local, z_local );
184       volo(i) = volume;
185       elemMass(i) = volume;
186       for (Index_t j = 0; j<8; ++j)
187       {
188         Index_t idx = elemToNode[j];
189         nodalMass(idx) += volume / Real_t(8.0);
190       }
191     }
192 
193     // deposit initial energy
194     // An energy of 3.948746e+7 is correct for a problem with
195     // 45 zones along a side - we need to scale it
196     const Real_t ebase = Real_t(3.948746e+7);
197     Real_t scale = (nx * m_tp) / Real_t(45.0);
198     Real_t einit = ebase * scale * scale * scale;
199     if (m_rowLoc + m_colLoc + m_planeLoc == 0)
200     {
201       // Dump into the first zone (which we know is in the corner)
202       // of the domain that sits at the origin
203       e(0) = einit;
204     }
205     //set initial deltatime base on analytic CFL calculation
206     deltatime() = (Real_t(.5) * cbrt(volo(0))) / sqrt(Real_t(2.0) * einit);
207 
208   } // End constructor
209 
210 
211 ////////////////////////////////////////////////////////////////////////////////
212   void
BuildMesh(Int_t nx,Int_t edgeNodes,Int_t edgeElems)213   Domain::BuildMesh(Int_t nx, Int_t edgeNodes, Int_t edgeElems)
214   {
215     Index_t meshEdgeElems = m_tp * nx;
216 
217     // initialize nodal coordinates
218     Index_t nidx = 0;
219     Real_t tz = Real_t(1.125) * Real_t(m_planeLoc * nx) / Real_t(meshEdgeElems);
220 
221     for (Index_t plane = 0; plane<edgeNodes; ++plane)
222     {
223       Real_t ty = Real_t(1.125) * Real_t(m_rowLoc * nx) / Real_t(meshEdgeElems);
224       for (Index_t row = 0; row<edgeNodes; ++row)
225       {
226         Real_t tx = Real_t(1.125) * Real_t(m_colLoc * nx) / Real_t(meshEdgeElems);
227         for (Index_t col = 0; col<edgeNodes; ++col)
228         {
229           x(nidx) = tx;
230           y(nidx) = ty;
231           z(nidx) = tz;
232           ++nidx;
233           // tx += ds ; // may accumulate roundoff...
234           tx = Real_t(1.125) * Real_t(m_colLoc * nx + col + 1) / Real_t(meshEdgeElems);
235         }
236         // ty += ds ;  // may accumulate roundoff...
237         ty = Real_t(1.125) * Real_t(m_rowLoc * nx + row + 1) / Real_t(meshEdgeElems);
238       }
239       // tz += ds ;  // may accumulate roundoff...
240       tz = Real_t(1.125) * Real_t(m_planeLoc * nx + plane + 1) / Real_t(meshEdgeElems);
241     }
242 
243 
244     // embed hexahedral elements in nodal point lattice
245     // SLAM NOTE: This could be represented using an implicit relation, when available
246     // SLAM NOTE: Alternatively, the underlying connectivity should be derivable from an implicit Cartesian grid.
247     IntsRegistry::BufferType& local_nodelist = m_intsRegistry.addBuffer("hex_node_boundary", 8 * numElem() );
248     Index_t zidx = 0;
249     nidx = 0;
250     const int planeNodes = edgeNodes * edgeNodes;
251     for (Index_t plane = 0; plane<edgeElems; ++plane)
252     {
253       for (Index_t row = 0; row<edgeElems; ++row)
254       {
255         for (Index_t col = 0; col<edgeElems; ++col)
256         {
257           Index_t *localNode = &local_nodelist[Index_t(8) * zidx];
258 
259           localNode[0] = nidx;
260           localNode[1] = nidx                          + 1;
261           localNode[2] = nidx              + edgeNodes + 1;
262           localNode[3] = nidx              + edgeNodes;
263           localNode[4] = nidx + planeNodes;
264           localNode[5] = nidx + planeNodes             + 1;
265           localNode[6] = nidx + planeNodes + edgeNodes + 1;
266           localNode[7] = nidx + planeNodes + edgeNodes;
267           ++zidx;
268           ++nidx;
269         }
270         ++nidx;
271       }
272       nidx += edgeNodes;
273     }
274 
275     m_nodelist.bindIndices(local_nodelist.size(), &local_nodelist);
276     SLIC_ASSERT( m_nodelist.isValid());
277   }
278 
279 
280 ////////////////////////////////////////////////////////////////////////////////
281   void
SetupThreadSupportStructures()282   Domain::SetupThreadSupportStructures()
283   {
284 #ifdef AXOM_USE_OPENMP
285     Index_t numthreads = omp_get_max_threads();
286 #else
287     Index_t numthreads = 1;
288 #endif
289 
290     if (numthreads > 1)
291     {
292       NodeIndexMap nodeCornerCount(&m_nodeSet);         // Keeps track of the number of corners per node
293 
294       for (Index_t i = 0; i<numElem(); ++i)               // foreach elem
295       {
296         ElemNodeSet nl = nodelist(i);                     //   grab the Elem2Node relation for the element
297         for (Index_t j = 0; j < nl.size(); ++j)           //   for each node of the element
298         {
299           ++(nodeCornerCount[ nl[j]] );                   //     increment the count for that node
300         }
301       }
302 
303       // Invert the zones to corner relation
304       IntsRegistry::BufferType& local_begins = m_intsRegistry.addBuffer("node_corner_begins", numNode() + 1 );
305       IntsRegistry::BufferType& local_relIndices = m_intsRegistry.addBuffer("node_corner_rel_indices", m_cornerSet.size() );
306 
307       local_begins[0] = 0;                                    // use the counts array to set the begins array
308       for (Index_t i = 1; i <= numNode(); ++i)
309       {
310         local_begins[i] = local_begins[i - 1] + nodeCornerCount[i - 1];
311         nodeCornerCount[i - 1] = 0;
312       }
313 
314       for (Index_t i = 0; i < numElem(); ++i)                    // foreach elem
315       {
316         ElemNodeSet nl = nodelist(i);                            //   grab the Elem2Node relation for the elem
317         for (Index_t j = 0; j < nl.size(); ++j)                  //   foreach node of the elem
318         {
319           Index_t m = nl[j];                                     //     m is the node index pointed to by this corner
320           Index_t k = i * 8 + j;                                 //     k is the corner index (elem*8+offset)
321           Index_t offset = local_begins[m] + nodeCornerCount[m]; //     offset is where this element belongs in the offsets array
322           local_relIndices[offset] = k;                             //     this is the offsets array of the node to corner relation
323           ++(nodeCornerCount[m]);                                //     increment the count for this node
324         }
325       }
326 
327       // Finally create the relation over these arrays and check validity
328       m_nodeCornerRelation = NodeToCornerRelation(&m_nodeSet, &m_cornerSet);
329       m_nodeCornerRelation.bindBeginOffsets(numNode(), &local_begins);
330       m_nodeCornerRelation.bindIndices(local_relIndices.size(), &local_relIndices);
331 
332       SLIC_ASSERT_MSG(m_nodeCornerRelation.isValid(), "Generating Node to Corner relation: Corner index out of range." );
333     }
334   }
335 
336 
337 ////////////////////////////////////////////////////////////////////////////////
338   void
SetupCommBuffers()339   Domain::SetupCommBuffers()
340   {
341     // allocate a buffer large enough for nodal ghost data
342     Index_t maxEdgeSize = MAX(this->sizeX(), MAX(this->sizeY(), this->sizeZ())) + 1;
343 
344     m_maxPlaneSize = CACHE_ALIGN_REAL(maxEdgeSize * maxEdgeSize);
345     m_maxEdgeSize = CACHE_ALIGN_REAL(maxEdgeSize);
346 
347     // assume communication to 6 neighbors by default
348     m_rowMin   = (m_rowLoc == 0)          ? 0 : 1;
349     m_rowMax   = (m_rowLoc == m_tp - 1)   ? 0 : 1;
350     m_colMin   = (m_colLoc == 0)          ? 0 : 1;
351     m_colMax   = (m_colLoc == m_tp - 1)   ? 0 : 1;
352     m_planeMin = (m_planeLoc == 0)        ? 0 : 1;
353     m_planeMax = (m_planeLoc == m_tp - 1) ? 0 : 1;
354 
355 #ifdef AXOM_USE_MPI
356     // account for face communication
357     Index_t comBufSize =
358         (m_rowMin + m_rowMax + m_colMin + m_colMax + m_planeMin + m_planeMax) *
359         m_maxPlaneSize * MAX_FIELDS_PER_MPI_COMM;
360 
361     // account for edge communication
362     comBufSize +=
363         ((m_rowMin & m_colMin) + (m_rowMin & m_planeMin) + (m_colMin & m_planeMin) +
364         (m_rowMax & m_colMax) + (m_rowMax & m_planeMax) + (m_colMax & m_planeMax) +
365         (m_rowMax & m_colMin) + (m_rowMin & m_planeMax) + (m_colMin & m_planeMax) +
366         (m_rowMin & m_colMax) + (m_rowMax & m_planeMin) + (m_colMax & m_planeMin)) *
367         m_maxEdgeSize * MAX_FIELDS_PER_MPI_COMM;
368 
369     // account for corner communication
370     // factor of 16 is so each buffer has its own cache line
371     comBufSize += ((m_rowMin & m_colMin & m_planeMin) +
372         (m_rowMin & m_colMin & m_planeMax) +
373         (m_rowMin & m_colMax & m_planeMin) +
374         (m_rowMin & m_colMax & m_planeMax) +
375         (m_rowMax & m_colMin & m_planeMin) +
376         (m_rowMax & m_colMin & m_planeMax) +
377         (m_rowMax & m_colMax & m_planeMin) +
378         (m_rowMax & m_colMax & m_planeMax)) * CACHE_COHERENCE_PAD_REAL;
379 
380     this->commDataSend = new Real_t[comBufSize];
381     this->commDataRecv = new Real_t[comBufSize];
382     // prevent floating point exceptions
383     memset( this->commDataSend, 0,  comBufSize * sizeof(Real_t));
384     memset( this->commDataRecv, 0,  comBufSize * sizeof(Real_t));
385 #endif
386 
387   }
388 
389 
390 ////////////////////////////////////////////////////////////////////////////////
391   void
CreateRegionIndexSets(Int_t nr,Int_t balance)392   Domain::CreateRegionIndexSets(Int_t nr, Int_t balance)
393   {
394     using RegionToElemDynamicRelation = axom::slam::DynamicVariableRelation<PositionType, ElementType>;
395 
396 #ifdef AXOM_USE_MPI
397     int myRank;
398     MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
399     srand(myRank);
400 #else
401     srand(0);
402     Index_t myRank = 0;
403 #endif
404 
405     // Create a region set over the number of regions in the mesh -- regions are offset by one
406     m_regionSet = RegionSet::SetBuilder().size(nr).offset(1);
407 
408     // Generate the region to element map as a dynamic relation.
409     // We will linearize this into a static relation below...
410     RegionToElemDynamicRelation reg2Elems(&m_regionSet, &m_elemSet);
411 
412     //if we only have one region just fill it
413     if(numReg() == 1)
414     {
415       // Create the region material number map over the elements
416       const Index_t regMatID = m_regionSet[0];
417       m_elemRegNum = ElemIntMap(&m_elemSet, regMatID);
418     }
419     //If we have more than one region distribute the elements.
420     else {
421       Int_t regionNum, regionIdx;
422       Int_t lastReg = -1;
423       Index_t elements;
424       Index_t runto = 0;
425 
426       // Create the region material map over the elements
427       m_elemRegNum = ElemIntMap(&m_elemSet);
428 
429       // Setup an unnormalized CDF to randomly choose the region
430       RegionIntMap regChooserProbs(&m_regionSet);
431 
432       // Determine the relative weights of all the regions as a CDF.
433       // This is based off the -b flag.  Balance is the value passed into b.
434       Int_t costDenominator = 0;
435       for (Index_t i = 0; i< numReg(); ++i)
436       {
437         costDenominator += pow((i + 1), balance); //Total sum of all regions weights
438         regChooserProbs[i] = costDenominator;   //Chance of hitting a given region is (regChooserProbs[i] - regChooserProbs[i-1])/costDenominator
439       }
440 
441       // Setup an unnormalized CDF to randomly choose the number of elements in each run
442       const int sizeChooserProbs[7] = {773, 937, 970, 974, 978, 981, 1000};
443       using Builder = axom::slam::RangeSet<>::SetBuilder;
444       axom::slam::RangeSet<> sizeChooser[7];
445       sizeChooser[0] = Builder().range(1,16);
446       sizeChooser[1] = Builder().range(16,32);
447       sizeChooser[2] = Builder().range(32,64);
448       sizeChooser[3] = Builder().range(64,128);
449       sizeChooser[4] = Builder().range(128,256);
450       sizeChooser[5] = Builder().range(256,512);
451       sizeChooser[6] = Builder().range(512,2048);
452 
453 
454       // Assign an element to each region
455       Index_t nextIndex = 0;
456       while (nextIndex < numElem())
457       {
458         //pick the region; make sure we don't pick the same region twice in a row
459         do {
460           Int_t regionVar = rand() % costDenominator;
461           Index_t i = 0;
462           while(regionVar >= regChooserProbs[i])
463             i++;
464 
465           // Rotate regions based on MPI rank to give each domain a different region with the highest representation
466           regionIdx = (i + myRank) % numReg();
467           regionNum = m_regionSet[ regionIdx ];
468         } while(regionNum == lastReg);
469 
470         // Determine the number of elements in this run using the sizeChooserProbs CDF and sizeChooser sets.
471         Int_t binSize = rand() % 1000;
472         Index_t binIndex = 0;
473         while(binSize >= sizeChooserProbs[binIndex])
474           ++binIndex;
475 
476         const axom::slam::RangeSet<>& rset = sizeChooser[ binIndex ];
477         elements = rset[ rand() % rset.size() ]; // gets a random number from this bin's sizeChooser set
478 
479         runto = elements + nextIndex;
480 
481         //Store the elements.  If we hit the end before we run out of elements then just stop.
482         for(; nextIndex < runto && nextIndex < numElem(); nextIndex++)
483         {
484           reg2Elems.insert(regionIdx, nextIndex);
485           m_elemRegNum[nextIndex] = regionNum;
486         }
487         lastReg = regionNum;
488       }
489     }
490 
491     SLIC_ASSERT(reg2Elems.isValid());    // Ensure that the dynamic relation is valid
492 
493     // Convert from a Dynamic to a Static relation
494     IntsRegistry::BufferType& local_begins = m_intsRegistry.addBuffer("reg_elem_begins", numReg() + 1 );
495     IntsRegistry::BufferType& local_relIndices = m_intsRegistry.addBuffer("reg_elem_rel_indices", numElem() );
496     Index_t curOffIdx = 0;
497     for(Index_t regionPos = 0; regionPos < numReg(); ++regionPos)
498     {
499       local_begins[ regionPos] = curOffIdx;
500       for(Index_t elemRelPos = 0; elemRelPos < reg2Elems.size( regionPos); ++elemRelPos)
501       {
502         local_relIndices[curOffIdx++] = reg2Elems[ regionPos][elemRelPos];
503       }
504     }
505     local_begins[ numReg()] = local_relIndices.size();
506 
507     /// We can finally create the region to elem relation
508     m_regionElementsRel = RegionToElemRelation(&m_regionSet, &m_elemSet);
509     m_regionElementsRel.bindBeginOffsets(numReg(), &local_begins);
510     m_regionElementsRel.bindIndices(local_relIndices.size(), &local_relIndices);
511 
512     SLIC_ASSERT(m_regionElementsRel.isValid(true));    // Ensure that the relation is valid
513   }
514 
515 /////////////////////////////////////////////////////////////
516   void
SetupSymmetryPlanes(Int_t edgeNodes)517   Domain::SetupSymmetryPlanes(Int_t edgeNodes)
518   {
519     Int_t numSymmNodesX = m_colLoc == 0   ? edgeNodes * edgeNodes : 0;
520     Int_t numSymmNodesY = m_rowLoc == 0   ? edgeNodes * edgeNodes : 0;
521     Int_t numSymmNodesZ = m_planeLoc == 0 ? edgeNodes * edgeNodes : 0;
522 
523     // Setup symmetry NodeSets            // TODO: Need to add the mesh's nodes as parent
524     m_symmX = SymmNodeSet(numSymmNodesX); // eparentSet = &m_nodeSet
525     m_symmY = SymmNodeSet(numSymmNodesY);
526     m_symmZ = SymmNodeSet(numSymmNodesZ);
527 
528     auto& loc_symmX = m_intsRegistry.addField("symmX", &m_symmX).data();
529     auto& loc_symmY = m_intsRegistry.addField("symmY", &m_symmY).data();
530     auto& loc_symmZ = m_intsRegistry.addField("symmZ", &m_symmZ).data();
531 
532     // SLAM Note: We should be able to compute these directory from a Cartesian product set defining a regular grid.
533     Index_t nidx = 0;
534     for (Index_t i = 0; i<edgeNodes; ++i)
535     {
536       Index_t planeInc = i * edgeNodes * edgeNodes;
537       Index_t rowInc   = i * edgeNodes;
538       for (Index_t j = 0; j<edgeNodes; ++j)
539       {
540         if (m_planeLoc == 0)
541         {
542           loc_symmZ[nidx] = rowInc   + j;
543         }
544         if (m_rowLoc == 0)
545         {
546           loc_symmY[nidx] = planeInc + j;
547         }
548         if (m_colLoc == 0)
549         {
550           loc_symmX[nidx] = planeInc + j * edgeNodes;
551         }
552         ++nidx;
553       }
554     }
555 
556     m_symmX.data() = &loc_symmX;
557     m_symmY.data() = &loc_symmY;
558     m_symmZ.data() = &loc_symmZ;
559 
560     // Verify validity of the sets.
561     SLIC_ASSERT(  m_symmX.isValid() && m_symmX.size() == numSymmNodesX && m_symmX.data() == &loc_symmX);
562     SLIC_ASSERT(  m_symmY.isValid() && m_symmY.size() == numSymmNodesY && m_symmY.data() == &loc_symmY);
563     SLIC_ASSERT(  m_symmZ.isValid() && m_symmZ.size() == numSymmNodesZ && m_symmZ.data() == &loc_symmZ);
564   }
565 
566 
567 
568 /////////////////////////////////////////////////////////////
569   void
SetupElementConnectivities(Int_t edgeElems)570   Domain::SetupElementConnectivities(Int_t edgeElems)
571   {
572     // Create temporary arrays to hold the data
573     IntsRegistry::BufferType& indices_xim = m_intsRegistry.addBuffer("zone_face_xi_m", numElem());
574     IntsRegistry::BufferType& indices_xip = m_intsRegistry.addBuffer("zone_face_xi_p", numElem());
575 
576     // Setup xi face adjacencies
577     indices_xim[0] = 0;
578     for (Index_t i = 1; i<numElem(); ++i)
579     {
580       indices_xim[i]   = i - 1;
581       indices_xip[i - 1] = i;
582     }
583     indices_xip[numElem() - 1] = numElem() - 1;
584     m_lxim.bindIndices(numElem(), &indices_xim);
585     m_lxip.bindIndices(numElem(), &indices_xip);
586 
587     // Setup eta face adjacencies
588     IntsRegistry::BufferType& indices_etam = m_intsRegistry.addBuffer("zone_face_eta_m", numElem());
589     IntsRegistry::BufferType& indices_etap = m_intsRegistry.addBuffer("zone_face_eta_p", numElem());
590     for (Index_t i = 0; i<edgeElems; ++i)
591     {
592       indices_etam[i] = i;
593       indices_etap[numElem() - edgeElems + i] = numElem() - edgeElems + i;
594     }
595     for (Index_t i = edgeElems; i<numElem(); ++i)
596     {
597       indices_etam[i] = i - edgeElems;
598       indices_etap[i - edgeElems] = i;
599     }
600     m_letam.bindIndices( numElem(), &indices_etam);
601     m_letap.bindIndices( numElem(), &indices_etap);
602 
603     // Setup zeta face adjacencies
604     IntsRegistry::BufferType& indices_zetam = m_intsRegistry.addBuffer("zone_face_zeta_m", numElem());
605     IntsRegistry::BufferType& indices_zetap = m_intsRegistry.addBuffer("zone_face_zeta_p", numElem());
606     for (Index_t i = 0; i<edgeElems * edgeElems; ++i)
607     {
608       indices_zetam[i] = i;
609       indices_zetap[numElem() - edgeElems * edgeElems + i] = numElem() - edgeElems * edgeElems + i;
610     }
611     for (Index_t i = edgeElems * edgeElems; i<numElem(); ++i)
612     {
613       indices_zetam[i] = i - edgeElems * edgeElems;
614       indices_zetap[i - edgeElems * edgeElems] = i;
615     }
616     m_lzetam.bindIndices(numElem(), &indices_zetam);
617     m_lzetap.bindIndices(numElem(), &indices_zetap);
618 
619     // Ensure that all the indices in the relations are valid
620     SLIC_ASSERT(  m_lxim.isValid() );
621     SLIC_ASSERT(  m_lxip.isValid() );
622     SLIC_ASSERT(  m_letam.isValid() );
623     SLIC_ASSERT(  m_letap.isValid() );
624     SLIC_ASSERT(  m_lzetam.isValid() );
625     SLIC_ASSERT(  m_lzetap.isValid() );
626   }
627 
628 /////////////////////////////////////////////////////////////
629   void
SetupBoundaryConditions(Int_t edgeElems)630   Domain::SetupBoundaryConditions(Int_t edgeElems)
631   {
632     Index_t ghostIdx[6]; // offsets to ghost locations
633 
634     // set up boundary condition information
635     for (Index_t i = 0; i<numElem(); ++i)
636     {
637       elemBC(i) = Int_t(0);
638     }
639 
640     for (Index_t i = 0; i<6; ++i)
641     {
642       ghostIdx[i] = INT_MIN;
643     }
644 
645     Int_t pidx = numElem();
646     if (m_planeMin != 0)
647     {
648       ghostIdx[0] = pidx;
649       pidx += sizeX() * sizeY();
650     }
651 
652     if (m_planeMax != 0)
653     {
654       ghostIdx[1] = pidx;
655       pidx += sizeX() * sizeY();
656     }
657 
658     if (m_rowMin != 0)
659     {
660       ghostIdx[2] = pidx;
661       pidx += sizeX() * sizeZ();
662     }
663 
664     if (m_rowMax != 0)
665     {
666       ghostIdx[3] = pidx;
667       pidx += sizeX() * sizeZ();
668     }
669 
670     if (m_colMin != 0)
671     {
672       ghostIdx[4] = pidx;
673       pidx += sizeY() * sizeZ();
674     }
675 
676     if (m_colMax != 0)
677     {
678       ghostIdx[5] = pidx;
679     }
680 
681 
682     // SLAM HACK: We want to modify the data of a StaticRelation (ElemFaceAdjacencyRelation).
683     //            This will be addressed in ATK-927.
684     //            For now, grab the underlying array from the registry
685 
686     IntsRegistry::BufferType& local_xi_m = m_intsRegistry.getBuffer("zone_face_xi_m");
687     IntsRegistry::BufferType& local_xi_p = m_intsRegistry.getBuffer("zone_face_xi_p");
688     IntsRegistry::BufferType& local_eta_m = m_intsRegistry.getBuffer("zone_face_eta_m");
689     IntsRegistry::BufferType& local_eta_p = m_intsRegistry.getBuffer("zone_face_eta_p");
690     IntsRegistry::BufferType& local_zeta_m = m_intsRegistry.getBuffer("zone_face_zeta_m");
691     IntsRegistry::BufferType& local_zeta_p = m_intsRegistry.getBuffer("zone_face_zeta_p");
692 
693     // symmetry plane or free surface BCs
694     for (Index_t i = 0; i<edgeElems; ++i)
695     {
696       Index_t planeInc = i * edgeElems * edgeElems;
697       Index_t rowInc   = i * edgeElems;
698       for (Index_t j = 0; j<edgeElems; ++j)
699       {
700         if (m_planeLoc == 0)
701         {
702           elemBC(rowInc + j) |= ZETA_M_SYMM;
703         }
704         else {
705           elemBC(rowInc + j) |= ZETA_M_COMM;
706           local_zeta_m[rowInc + j] = ghostIdx[0] + rowInc + j;
707         }
708 
709         if (m_planeLoc == m_tp - 1)
710         {
711           elemBC(rowInc + j + numElem() - edgeElems * edgeElems) |= ZETA_P_FREE;
712         }
713         else {
714           elemBC(rowInc + j + numElem() - edgeElems * edgeElems) |= ZETA_P_COMM;
715           local_zeta_p[rowInc + j + numElem() - edgeElems * edgeElems] = ghostIdx[1] + rowInc + j;
716         }
717 
718         if (m_rowLoc == 0)
719         {
720           elemBC(planeInc + j) |= ETA_M_SYMM;
721         }
722         else {
723           elemBC(planeInc + j) |= ETA_M_COMM;
724           local_eta_m[planeInc + j] = ghostIdx[2] + rowInc + j;
725         }
726 
727         if (m_rowLoc == m_tp - 1)
728         {
729           elemBC(planeInc + j + edgeElems * edgeElems - edgeElems) |= ETA_P_FREE;
730         }
731         else {
732           elemBC(planeInc + j + edgeElems * edgeElems - edgeElems) |= ETA_P_COMM;
733           local_eta_p[planeInc + j + edgeElems * edgeElems - edgeElems] = ghostIdx[3] +  rowInc + j;
734         }
735 
736         if (m_colLoc == 0)
737         {
738           elemBC(planeInc + j * edgeElems) |= XI_M_SYMM;
739         }
740         else {
741           elemBC(planeInc + j * edgeElems) |= XI_M_COMM;
742           local_xi_m[planeInc + j * edgeElems] = ghostIdx[4] + rowInc + j;
743         }
744 
745         if (m_colLoc == m_tp - 1)
746         {
747           elemBC(planeInc + j * edgeElems + edgeElems - 1) |= XI_P_FREE;
748         }
749         else {
750           elemBC(planeInc + j * edgeElems + edgeElems - 1) |= XI_P_COMM;
751           local_xi_p[planeInc + j * edgeElems + edgeElems - 1] = ghostIdx[5] + rowInc + j;
752         }
753       }
754     }
755 
756     // Ensure that all the indices in the element adjacency relations are still valid
757     SLIC_ASSERT(  m_lxim.isValid() );
758     SLIC_ASSERT(  m_lxip.isValid() );
759     SLIC_ASSERT(  m_letam.isValid() );
760     SLIC_ASSERT(  m_letap.isValid() );
761     SLIC_ASSERT(  m_lzetam.isValid() );
762     SLIC_ASSERT(  m_lzetap.isValid() );
763   }
764 
765 ///////////////////////////////////////////////////////////////////////////
InitMeshDecomp(Int_t numRanks,Int_t myRank,Int_t * col,Int_t * row,Int_t * plane,Int_t * side)766   void InitMeshDecomp(Int_t numRanks, Int_t myRank,
767       Int_t *col, Int_t *row, Int_t *plane, Int_t *side)
768   {
769     Int_t testProcs;
770     Int_t dx, dy, dz;
771     Int_t myDom;
772 
773     // Assume cube processor layout for now
774     testProcs = Int_t(cbrt(Real_t(numRanks)) + 0.5);
775     if (testProcs * testProcs * testProcs != numRanks)
776     {
777       SLIC_WARNING("Num processors must be a cube of an integer (1, 8, 27, ...)");
778 #ifdef AXOM_USE_MPI
779       MPI_Abort(MPI_COMM_WORLD, -1);
780 #else
781       exit(-1);
782 #endif
783     }
784     if (sizeof(Real_t) != 4 && sizeof(Real_t) != 8)
785     {
786       SLIC_WARNING("MPI operations only support float and double right now...");
787 #ifdef AXOM_USE_MPI
788       MPI_Abort(MPI_COMM_WORLD, -1);
789 #else
790       exit(-1);
791 #endif
792     }
793     if (MAX_FIELDS_PER_MPI_COMM > CACHE_COHERENCE_PAD_REAL)
794     {
795       SLIC_WARNING("Corner element comm buffers too small.  Fix code.");
796 #ifdef AXOM_USE_MPI
797       MPI_Abort(MPI_COMM_WORLD, -1);
798 #else
799       exit(-1);
800 #endif
801     }
802 
803     dx = testProcs;
804     dy = testProcs;
805     dz = testProcs;
806 
807     // temporary test
808     if (dx * dy * dz != numRanks)
809     {
810       SLIC_WARNING("Error -- must have as many domains as procs.");
811 #ifdef AXOM_USE_MPI
812       MPI_Abort(MPI_COMM_WORLD, -1);
813 #else
814       exit(-1);
815 #endif
816     }
817     Int_t remainder = dx * dy * dz % numRanks;
818     if (myRank < remainder)
819     {
820       myDom = myRank * ( 1 + (dx * dy * dz / numRanks));
821     }
822     else {
823       myDom = remainder * ( 1 + (dx * dy * dz / numRanks)) +
824           (myRank - remainder) * (dx * dy * dz / numRanks);
825     }
826 
827     *col = myDom % dx;
828     *row = (myDom / dx) % dy;
829     *plane = myDom / (dx * dy);
830     *side = testProcs;
831 
832     return;
833   }
834 
835 } // end namespace slamLulesh
836