1 #if !defined(USE_MPI)
2 # error "You should specify USE_MPI=0 or USE_MPI=1 on the compile line"
3 #endif
4 
5 
6 // OpenMP will be compiled in if this flag is set to 1 AND the compiler beging
7 // used supports it (i.e. the LULESH_USE_OPENMP symbol is defined)
8 #define USE_OMP 1
9 
10 #if USE_MPI
11 #include <mpi.h>
12 #endif
13 
14 #include <mpi.h>
15 
16 /*
17    define one of these three symbols:
18 
19    SEDOV_SYNC_POS_VEL_NONE
20    SEDOV_SYNC_POS_VEL_EARLY
21    SEDOV_SYNC_POS_VEL_LATE
22 */
23 
24 #define SEDOV_SYNC_POS_VEL_EARLY 1
25 
26 #include <math.h>
27 #include <vector>
28 
29 //**************************************************
30 // Allow flexibility for arithmetic representations
31 //**************************************************
32 
33 #define MAX(a, b) ( ((a) > (b)) ? (a) : (b))
34 
35 
36 // Precision specification
37 typedef float        real4 ;
38 typedef double       real8 ;
39 typedef long double  real10 ;  // 10 bytes on x86
40 
41 typedef int    Index_t ; // array subscript and loop index
42 typedef real8  Real_t ;  // floating point representation
43 typedef int    Int_t ;   // integer representation
44 
45 enum { VolumeError = -1, QStopError = -2 } ;
46 
SQRT(real4 arg)47 inline real4  SQRT(real4  arg) { return sqrtf(arg) ; }
SQRT(real8 arg)48 inline real8  SQRT(real8  arg) { return sqrt(arg) ; }
SQRT(real10 arg)49 inline real10 SQRT(real10 arg) { return sqrtl(arg) ; }
50 
CBRT(real4 arg)51 inline real4  CBRT(real4  arg) { return cbrtf(arg) ; }
CBRT(real8 arg)52 inline real8  CBRT(real8  arg) { return cbrt(arg) ; }
CBRT(real10 arg)53 inline real10 CBRT(real10 arg) { return cbrtl(arg) ; }
54 
FABS(real4 arg)55 inline real4  FABS(real4  arg) { return fabsf(arg) ; }
FABS(real8 arg)56 inline real8  FABS(real8  arg) { return fabs(arg) ; }
FABS(real10 arg)57 inline real10 FABS(real10 arg) { return fabsl(arg) ; }
58 
59 
60 // Stuff needed for boundary conditions
61 // 2 BCs on each of 6 hexahedral faces (12 bits)
62 #define XI_M        0x00007
63 #define XI_M_SYMM   0x00001
64 #define XI_M_FREE   0x00002
65 #define XI_M_COMM   0x00004
66 
67 #define XI_P        0x00038
68 #define XI_P_SYMM   0x00008
69 #define XI_P_FREE   0x00010
70 #define XI_P_COMM   0x00020
71 
72 #define ETA_M       0x001c0
73 #define ETA_M_SYMM  0x00040
74 #define ETA_M_FREE  0x00080
75 #define ETA_M_COMM  0x00100
76 
77 #define ETA_P       0x00e00
78 #define ETA_P_SYMM  0x00200
79 #define ETA_P_FREE  0x00400
80 #define ETA_P_COMM  0x00800
81 
82 #define ZETA_M      0x07000
83 #define ZETA_M_SYMM 0x01000
84 #define ZETA_M_FREE 0x02000
85 #define ZETA_M_COMM 0x04000
86 
87 #define ZETA_P      0x38000
88 #define ZETA_P_SYMM 0x08000
89 #define ZETA_P_FREE 0x10000
90 #define ZETA_P_COMM 0x20000
91 
92 // MPI Message Tags
93 #define MSG_COMM_SBN      1024
94 #define MSG_SYNC_POS_VEL  2048
95 #define MSG_MONOQ         3072
96 
97 #define MAX_FIELDS_PER_MPI_COMM 6
98 
99 // Assume 128 byte coherence
100 // Assume Real_t is an "integral power of 2" bytes wide
101 #define CACHE_COHERENCE_PAD_REAL (128 / sizeof(Real_t))
102 
103 #define CACHE_ALIGN_REAL(n) \
104    (((n) + (CACHE_COHERENCE_PAD_REAL - 1)) & ~(CACHE_COHERENCE_PAD_REAL-1))
105 
106 //////////////////////////////////////////////////////
107 // Primary data structure
108 //////////////////////////////////////////////////////
109 
110 /*
111  * The implementation of the data abstraction used for lulesh
112  * resides entirely in the Domain class below.  You can change
113  * grouping and interleaving of fields here to maximize data layout
114  * efficiency for your underlying architecture or compiler.
115  *
116  * For example, fields can be implemented as STL objects or
117  * raw array pointers.  As another example, individual fields
118  * m_x, m_y, m_z could be budled into
119  *
120  *    struct { Real_t x, y, z ; } *m_coord ;
121  *
122  * allowing accessor functions such as
123  *
124  *  "Real_t &x(Index_t idx) { return m_coord[idx].x ; }"
125  *  "Real_t &y(Index_t idx) { return m_coord[idx].y ; }"
126  *  "Real_t &z(Index_t idx) { return m_coord[idx].z ; }"
127  */
128 
129 class Domain {
130 
131    public:
132 
133    // Constructor
134    Domain(Int_t numRanks, Index_t colLoc,
135           Index_t rowLoc, Index_t planeLoc,
136           Index_t nx, Int_t tp, Int_t nr, Int_t balance, Int_t cost);
137 
138    //
139    // ALLOCATION
140    //
141 
AllocateNodePersistent(Int_t numNode)142    void AllocateNodePersistent(Int_t numNode) // Node-centered
143    {
144       m_coord.resize(numNode);  // coordinates
145 
146       m_vel.resize(numNode); // velocities
147 
148       m_acc.resize(numNode); // accelerations
149 
150       m_force.resize(numNode);  // forces
151 
152       m_nodalMass.resize(numNode);  // mass
153    }
154 
AllocateElemPersistent(Int_t numElem)155    void AllocateElemPersistent(Int_t numElem) // Elem-centered
156    {
157       m_nodelist.resize(8*numElem);
158 
159       // elem connectivities through face
160       m_faceToElem.resize(numElem);
161 
162       m_elemBC.resize(numElem);
163 
164       m_e.resize(numElem);
165 
166       m_pq.resize(numElem);
167 
168       m_qlqq.resize(numElem);
169 
170       m_vol.resize(numElem);
171 
172       m_delv.resize(numElem);
173       m_vdov.resize(numElem);
174 
175       m_arealg.resize(numElem);
176 
177       m_ss.resize(numElem);
178 
179       m_elemMass.resize(numElem);
180    }
181 
AllocateGradients(Int_t numElem,Int_t allElem)182    void AllocateGradients(Int_t numElem, Int_t allElem)
183    {
184       // Position gradients
185       m_delx_xi.resize(numElem) ;
186       m_delx_eta.resize(numElem) ;
187       m_delx_zeta.resize(numElem) ;
188 
189       // Velocity gradients
190       m_delv_xi.resize(allElem) ;
191       m_delv_eta.resize(allElem);
192       m_delv_zeta.resize(allElem) ;
193    }
194 
DeallocateGradients()195    void DeallocateGradients()
196    {
197       m_delx_zeta.clear() ;
198       m_delx_eta.clear() ;
199       m_delx_xi.clear() ;
200 
201       m_delv_zeta.clear() ;
202       m_delv_eta.clear() ;
203       m_delv_xi.clear() ;
204    }
205 
AllocateStrains(Int_t numElem)206    void AllocateStrains(Int_t numElem)
207    {
208       m_dxx.resize(numElem) ;
209       m_dyy.resize(numElem) ;
210       m_dzz.resize(numElem) ;
211    }
212 
DeallocateStrains()213    void DeallocateStrains()
214    {
215       m_dzz.clear() ;
216       m_dyy.clear() ;
217       m_dxx.clear() ;
218    }
219 
220    //
221    // ACCESSORS
222    //
223 
224    // Node-centered
225 
226    // Nodal coordinates
x(Index_t idx)227    Real_t& x(Index_t idx)    { return m_coord[idx].x ; }
y(Index_t idx)228    Real_t& y(Index_t idx)    { return m_coord[idx].y ; }
z(Index_t idx)229    Real_t& z(Index_t idx)    { return m_coord[idx].z ; }
230 
231    // Nodal velocities
xd(Index_t idx)232    Real_t& xd(Index_t idx)   { return m_vel[idx].x ; }
yd(Index_t idx)233    Real_t& yd(Index_t idx)   { return m_vel[idx].y ; }
zd(Index_t idx)234    Real_t& zd(Index_t idx)   { return m_vel[idx].z ; }
235 
236    // Nodal accelerations
xdd(Index_t idx)237    Real_t& xdd(Index_t idx)  { return m_acc[idx].x ; }
ydd(Index_t idx)238    Real_t& ydd(Index_t idx)  { return m_acc[idx].y ; }
zdd(Index_t idx)239    Real_t& zdd(Index_t idx)  { return m_acc[idx].z ; }
240 
241    // Nodal forces
fx(Index_t idx)242    Real_t& fx(Index_t idx)   { return m_force[idx].x ; }
fy(Index_t idx)243    Real_t& fy(Index_t idx)   { return m_force[idx].y ; }
fz(Index_t idx)244    Real_t& fz(Index_t idx)   { return m_force[idx].z ; }
245 
246    // Nodal mass
nodalMass(Index_t idx)247    Real_t& nodalMass(Index_t idx) { return m_nodalMass[idx] ; }
248 
249    // Nodes on symmertry planes
symmX(Index_t idx)250    Index_t symmX(Index_t idx) { return m_symmX[idx] ; }
symmY(Index_t idx)251    Index_t symmY(Index_t idx) { return m_symmY[idx] ; }
symmZ(Index_t idx)252    Index_t symmZ(Index_t idx) { return m_symmZ[idx] ; }
symmXempty()253    bool symmXempty()          { return m_symmX.empty(); }
symmYempty()254    bool symmYempty()          { return m_symmY.empty(); }
symmZempty()255    bool symmZempty()          { return m_symmZ.empty(); }
256 
257    //
258    // Element-centered
259    //
regElemSize(Index_t idx)260    Index_t&  regElemSize(Index_t idx) { return m_regElemSize[idx] ; }
regNumList(Index_t idx)261    Index_t&  regNumList(Index_t idx) { return m_regNumList[idx] ; }
regNumList()262    Index_t*  regNumList()            { return &m_regNumList[0] ; }
regElemlist(Int_t r)263    Index_t*  regElemlist(Int_t r)    { return m_regElemlist[r] ; }
regElemlist(Int_t r,Index_t idx)264    Index_t&  regElemlist(Int_t r, Index_t idx) { return m_regElemlist[r][idx] ; }
265 
nodelist(Index_t idx)266    Index_t*  nodelist(Index_t idx)    { return &m_nodelist[Index_t(8)*idx] ; }
267 
268    // elem connectivities through face
lxim(Index_t idx)269    Index_t&  lxim(Index_t idx) { return m_faceToElem[idx].lxim ; }
lxip(Index_t idx)270    Index_t&  lxip(Index_t idx) { return m_faceToElem[idx].lxip ; }
letam(Index_t idx)271    Index_t&  letam(Index_t idx) { return m_faceToElem[idx].letam ; }
letap(Index_t idx)272    Index_t&  letap(Index_t idx) { return m_faceToElem[idx].letap ; }
lzetam(Index_t idx)273    Index_t&  lzetam(Index_t idx) { return m_faceToElem[idx].lzetam ; }
lzetap(Index_t idx)274    Index_t&  lzetap(Index_t idx) { return m_faceToElem[idx].lzetap ; }
275 
276    // elem face symm/free-surface flag
elemBC(Index_t idx)277    Int_t&  elemBC(Index_t idx) { return m_elemBC[idx] ; }
278 
279    // Principal strains - temporary
dxx(Index_t idx)280    Real_t& dxx(Index_t idx)  { return m_dxx[idx] ; }
dyy(Index_t idx)281    Real_t& dyy(Index_t idx)  { return m_dyy[idx] ; }
dzz(Index_t idx)282    Real_t& dzz(Index_t idx)  { return m_dzz[idx] ; }
283 
284    // Velocity gradient - temporary
delv_xi(Index_t idx)285    Real_t& delv_xi(Index_t idx)    { return m_delv_xi[idx] ; }
delv_eta(Index_t idx)286    Real_t& delv_eta(Index_t idx)   { return m_delv_eta[idx] ; }
delv_zeta(Index_t idx)287    Real_t& delv_zeta(Index_t idx)  { return m_delv_zeta[idx] ; }
288 
289    // Position gradient - temporary
delx_xi(Index_t idx)290    Real_t& delx_xi(Index_t idx)    { return m_delx_xi[idx] ; }
delx_eta(Index_t idx)291    Real_t& delx_eta(Index_t idx)   { return m_delx_eta[idx] ; }
delx_zeta(Index_t idx)292    Real_t& delx_zeta(Index_t idx)  { return m_delx_zeta[idx] ; }
293 
294    // Energy
e(Index_t idx)295    Real_t& e(Index_t idx)          { return m_e[idx] ; }
296 
297    // Pressure
p(Index_t idx)298    Real_t& p(Index_t idx)          { return m_pq[idx].p ; }
299 
300    // Artificial viscosity
q(Index_t idx)301    Real_t& q(Index_t idx)          { return m_pq[idx].q ; }
302 
303    // Linear term for q
ql(Index_t idx)304    Real_t& ql(Index_t idx)         { return m_qlqq[idx].ql ; }
305    // Quadratic term for q
qq(Index_t idx)306    Real_t& qq(Index_t idx)         { return m_qlqq[idx].qq ; }
307 
delv(Index_t idx)308    Real_t& delv(Index_t idx)       { return m_delv[idx] ; }
309 
310    // Relative volume
v(Index_t idx)311    Real_t& v(Index_t idx)          { return m_vol[idx].v ; }
312    // Reference volume
volo(Index_t idx)313    Real_t& volo(Index_t idx)       { return m_vol[idx].volo ; }
314 
315    // volume derivative over volume
vdov(Index_t idx)316    Real_t& vdov(Index_t idx)       { return m_vdov[idx] ; }
317 
318    // Element characteristic length
arealg(Index_t idx)319    Real_t& arealg(Index_t idx)     { return m_arealg[idx] ; }
320 
321    // Sound speed
ss(Index_t idx)322    Real_t& ss(Index_t idx)         { return m_ss[idx] ; }
323 
324    // Element mass
elemMass(Index_t idx)325    Real_t& elemMass(Index_t idx)  { return m_elemMass[idx] ; }
326 
nodeElemCount(Index_t idx)327    Index_t nodeElemCount(Index_t idx)
328    { return m_nodeElemStart[idx+1] - m_nodeElemStart[idx] ; }
329 
nodeElemCornerList(Index_t idx)330    Index_t *nodeElemCornerList(Index_t idx)
331    { return &m_nodeElemCornerList[m_nodeElemStart[idx]] ; }
332 
333    // Parameters
334 
335    // Cutoffs
u_cut()336    Real_t u_cut() const               { return m_u_cut ; }
e_cut()337    Real_t e_cut() const               { return m_e_cut ; }
p_cut()338    Real_t p_cut() const               { return m_p_cut ; }
q_cut()339    Real_t q_cut() const               { return m_q_cut ; }
v_cut()340    Real_t v_cut() const               { return m_v_cut ; }
341 
342    // Other constants (usually are settable via input file in real codes)
hgcoef()343    Real_t hgcoef() const              { return m_hgcoef ; }
qstop()344    Real_t qstop() const               { return m_qstop ; }
monoq_max_slope()345    Real_t monoq_max_slope() const     { return m_monoq_max_slope ; }
monoq_limiter_mult()346    Real_t monoq_limiter_mult() const  { return m_monoq_limiter_mult ; }
ss4o3()347    Real_t ss4o3() const               { return m_ss4o3 ; }
qlc_monoq()348    Real_t qlc_monoq() const           { return m_qlc_monoq ; }
qqc_monoq()349    Real_t qqc_monoq() const           { return m_qqc_monoq ; }
qqc()350    Real_t qqc() const                 { return m_qqc ; }
351 
eosvmax()352    Real_t eosvmax() const             { return m_eosvmax ; }
eosvmin()353    Real_t eosvmin() const             { return m_eosvmin ; }
pmin()354    Real_t pmin() const                { return m_pmin ; }
emin()355    Real_t emin() const                { return m_emin ; }
dvovmax()356    Real_t dvovmax() const             { return m_dvovmax ; }
refdens()357    Real_t refdens() const             { return m_refdens ; }
358 
359    // Timestep controls, etc...
time()360    Real_t& time()                 { return m_time ; }
deltatime()361    Real_t& deltatime()            { return m_deltatime ; }
deltatimemultlb()362    Real_t& deltatimemultlb()      { return m_deltatimemultlb ; }
deltatimemultub()363    Real_t& deltatimemultub()      { return m_deltatimemultub ; }
stoptime()364    Real_t& stoptime()             { return m_stoptime ; }
dtcourant()365    Real_t& dtcourant()            { return m_dtcourant ; }
dthydro()366    Real_t& dthydro()              { return m_dthydro ; }
dtmax()367    Real_t& dtmax()                { return m_dtmax ; }
dtfixed()368    Real_t& dtfixed()              { return m_dtfixed ; }
369 
cycle()370    Int_t&  cycle()                { return m_cycle ; }
numRanks()371    Index_t&  numRanks()           { return m_numRanks ; }
372 
colLoc()373    Index_t&  colLoc()             { return m_colLoc ; }
rowLoc()374    Index_t&  rowLoc()             { return m_rowLoc ; }
planeLoc()375    Index_t&  planeLoc()           { return m_planeLoc ; }
tp()376    Index_t&  tp()                 { return m_tp ; }
377 
sizeX()378    Index_t&  sizeX()              { return m_sizeX ; }
sizeY()379    Index_t&  sizeY()              { return m_sizeY ; }
sizeZ()380    Index_t&  sizeZ()              { return m_sizeZ ; }
numReg()381    Index_t&  numReg()             { return m_numReg ; }
cost()382    Int_t&  cost()             { return m_cost ; }
numElem()383    Index_t&  numElem()            { return m_numElem ; }
numNode()384    Index_t&  numNode()            { return m_numNode ; }
385 
maxPlaneSize()386    Index_t&  maxPlaneSize()       { return m_maxPlaneSize ; }
maxEdgeSize()387    Index_t&  maxEdgeSize()        { return m_maxEdgeSize ; }
388 
389    //
390    // MPI-Related additional data
391    //
392 
393 #if USE_MPI
394    // Communication Work space
395    Real_t *commDataSend ;
396    Real_t *commDataRecv ;
397 
398    // Maximum number of block neighbors
399    MPI_Request recvRequest[26] ; // 6 faces + 12 edges + 8 corners
400    MPI_Request sendRequest[26] ; // 6 faces + 12 edges + 8 corners
401 #endif
402 
403   private:
404 
405    void BuildMesh(Int_t nx, Int_t edgeNodes, Int_t edgeElems);
406    void SetupThreadSupportStructures();
407    void CreateRegionIndexSets(Int_t nreg, Int_t balance);
408    void SetupCommBuffers(Int_t edgeNodes);
409    void SetupSymmetryPlanes(Int_t edgeNodes);
410    void SetupElementConnectivities(Int_t edgeElems);
411    void SetupBoundaryConditions(Int_t edgeElems);
412 
413    //
414    // IMPLEMENTATION
415    //
416 
417    /* Node-centered */
418 
419    struct Tuple3 {
420       Real_t x, y, z ;
421    } ;
422 
423    std::vector<Tuple3> m_coord ;  /* coordinates */
424 
425    std::vector<Tuple3> m_vel ; /* velocities */
426 
427    std::vector<Tuple3> m_acc ; /* accelerations */
428 
429    std::vector<Tuple3> m_force ;  /* forces */
430 
431    std::vector<Real_t> m_nodalMass ;  /* mass */
432 
433    std::vector<Index_t> m_symmX ;  /* symmetry plane nodesets */
434    std::vector<Index_t> m_symmY ;
435    std::vector<Index_t> m_symmZ ;
436 
437    // Element-centered
438 
439    // Region information
440    Int_t    m_numReg ;
441    Int_t    m_cost; //imbalance cost
442    Index_t *m_regElemSize ;   // Size of region sets
443    Index_t *m_regNumList ;    // Region number per domain element
444    Index_t **m_regElemlist ;  // region indexset
445 
446    std::vector<Index_t>  m_nodelist ;     /* elemToNode connectivity */
447 
448    struct FaceElemConn {
449       Index_t lxim, lxip, letam, letap, lzetam, lzetap ;
450    } ;
451 
452    std::vector<FaceElemConn> m_faceToElem ; /* element conn across faces */
453 
454    std::vector<Int_t>    m_elemBC ;  /* symmetry/free-surface flags for each elem face */
455 
456    std::vector<Real_t> m_dxx ;  /* principal strains -- temporary */
457    std::vector<Real_t> m_dyy ;
458    std::vector<Real_t> m_dzz ;
459 
460    std::vector<Real_t> m_delv_xi ;    /* velocity gradient -- temporary */
461    std::vector<Real_t> m_delv_eta ;
462    std::vector<Real_t> m_delv_zeta ;
463 
464    std::vector<Real_t> m_delx_xi ;    /* coordinate gradient -- temporary */
465    std::vector<Real_t> m_delx_eta ;
466    std::vector<Real_t> m_delx_zeta ;
467 
468    std::vector<Real_t> m_e ;   /* energy */
469 
470    struct Pcomponents {
471       Real_t p, q ;
472    } ;
473 
474    std::vector<Pcomponents> m_pq ;   /* pressure and artificial viscosity */
475 
476    struct Qcomponents {
477       Real_t ql, qq ;
478    } ;
479 
480    std::vector<Qcomponents> m_qlqq ;  /* linear and quadratic terms for q */
481 
482    struct Volume {
483       Real_t v, volo ;
484    } ;
485 
486    std::vector<Volume> m_vol ;     /* relative and reference volume */
487 
488    std::vector<Real_t> m_vnew ;  /* new relative volume -- temporary */
489    std::vector<Real_t> m_delv ;  /* m_vnew - m_v */
490    std::vector<Real_t> m_vdov ;  /* volume derivative over volume */
491 
492    std::vector<Real_t> m_arealg ;  /* characteristic length of an element */
493 
494    std::vector<Real_t> m_ss ;      /* "sound speed" */
495 
496    std::vector<Real_t> m_elemMass ;  /* mass */
497 
498    // Cutoffs (treat as constants)
499    const Real_t  m_e_cut ;             // energy tolerance
500    const Real_t  m_p_cut ;             // pressure tolerance
501    const Real_t  m_q_cut ;             // q tolerance
502    const Real_t  m_v_cut ;             // relative volume tolerance
503    const Real_t  m_u_cut ;             // velocity tolerance
504 
505    // Other constants (usually setable, but hardcoded in this proxy app)
506 
507    const Real_t  m_hgcoef ;            // hourglass control
508    const Real_t  m_ss4o3 ;
509    const Real_t  m_qstop ;             // excessive q indicator
510    const Real_t  m_monoq_max_slope ;
511    const Real_t  m_monoq_limiter_mult ;
512    const Real_t  m_qlc_monoq ;         // linear term coef for q
513    const Real_t  m_qqc_monoq ;         // quadratic term coef for q
514    const Real_t  m_qqc ;
515    const Real_t  m_eosvmax ;
516    const Real_t  m_eosvmin ;
517    const Real_t  m_pmin ;              // pressure floor
518    const Real_t  m_emin ;              // energy floor
519    const Real_t  m_dvovmax ;           // maximum allowable volume change
520    const Real_t  m_refdens ;           // reference density
521 
522    // Variables to keep track of timestep, simulation time, and cycle
523    Real_t  m_dtcourant ;         // courant constraint
524    Real_t  m_dthydro ;           // volume change constraint
525    Int_t   m_cycle ;             // iteration count for simulation
526    Real_t  m_dtfixed ;           // fixed time increment
527    Real_t  m_time ;              // current time
528    Real_t  m_deltatime ;         // variable time increment
529    Real_t  m_deltatimemultlb ;
530    Real_t  m_deltatimemultub ;
531    Real_t  m_dtmax ;             // maximum allowable time increment
532    Real_t  m_stoptime ;          // end time for simulation
533 
534 
535    Int_t   m_numRanks ;
536 
537    Index_t m_colLoc ;
538    Index_t m_rowLoc ;
539    Index_t m_planeLoc ;
540    Index_t m_tp ;
541 
542    Index_t m_sizeX ;
543    Index_t m_sizeY ;
544    Index_t m_sizeZ ;
545    Index_t m_numElem ;
546    Index_t m_numNode ;
547 
548    Index_t m_maxPlaneSize ;
549    Index_t m_maxEdgeSize ;
550 
551    // OMP hack
552    Index_t *m_nodeElemStart ;
553    Index_t *m_nodeElemCornerList ;
554 
555    // Used in setup
556    Index_t m_rowMin, m_rowMax;
557    Index_t m_colMin, m_colMax;
558    Index_t m_planeMin, m_planeMax ;
559 
560 } ;
561 
562 typedef Real_t &(Domain::* Domain_member )(Index_t) ;
563 
564 struct cmdLineOpts {
565    Int_t its; // -i
566    Int_t nx;  // -s
567    Int_t numReg; // -r
568    Int_t numFiles; // -f
569    Int_t showProg; // -p
570    Int_t quiet; // -q
571    Int_t viz; // -v
572    Int_t cost; // -c
573    Int_t balance; // -b
574 };
575 
576 
577 
578 // Function Prototypes
579 
580 // lulesh-par
581 Real_t CalcElemVolume( const Real_t x[8],
582                        const Real_t y[8],
583                        const Real_t z[8]);
584 
585 // lulesh-util
586 void ParseCommandLineOptions(int argc, char *argv[],
587                              Int_t myRank, struct cmdLineOpts *opts);
588 void VerifyAndWriteFinalOutput(Real_t elapsed_time,
589                                Domain& locDom,
590                                Int_t nx,
591                                Int_t numRanks);
592 
593 // lulesh-viz
594 void DumpToVisit(Domain& domain, int numFiles, int myRank, int numRanks);
595 
596 // lulesh-comm
597 void CommRecv(Domain& domain, Int_t msgType, Index_t xferFields,
598               Index_t dx, Index_t dy, Index_t dz,
599               bool doRecv, bool planeOnly);
600 void CommSend(Domain& domain, Int_t msgType,
601               Index_t xferFields, Domain_member *fieldData,
602               Index_t dx, Index_t dy, Index_t dz,
603               bool doSend, bool planeOnly);
604 void CommSBN(Domain& domain, Int_t xferFields, Domain_member *fieldData);
605 void CommSyncPosVel(Domain& domain);
606 void CommMonoQ(Domain& domain);
607 
608 // lulesh-init
609 void InitMeshDecomp(Int_t numRanks, Int_t myRank,
610                     Int_t *col, Int_t *row, Int_t *plane, Int_t *side);
611