1 /*
2 This is a Version 2.0 MPI + OpenMP implementation of LULESH
3
4 Copyright (c) 2010-2013.
5 Lawrence Livermore National Security, LLC.
6 Produced at the Lawrence Livermore National Laboratory.
7 LLNL-CODE-461231
8 All rights reserved.
9
10 This file is part of LULESH, Version 2.0.
11 Please also read this link -- http://www.opensource.org/licenses/index.php
12
13 //////////////
14 DIFFERENCES BETWEEN THIS VERSION (2.x) AND EARLIER VERSIONS:
15 * Addition of regions to make work more representative of multi-material codes
16 * Default size of each domain is 30^3 (27000 elem) instead of 45^3. This is
17 more representative of our actual working set sizes
18 * Single source distribution supports pure serial, pure OpenMP, MPI-only,
19 and MPI+OpenMP
20 * Addition of ability to visualize the mesh using VisIt
21 https://wci.llnl.gov/codes/visit/download.html
22 * Various command line options (see ./lulesh2.0 -h)
23 -q : quiet mode - suppress stdout
24 -i <iterations> : number of cycles to run
25 -s <size> : length of cube mesh along side
26 -r <numregions> : Number of distinct regions (def: 11)
27 -b <balance> : Load balance between regions of a domain (def: 1)
28 -c <cost> : Extra cost of more expensive regions (def: 1)
29 -f <filepieces> : Number of file parts for viz output (def: np/9)
30 -p : Print out progress
31 -v : Output viz file (requires compiling with -DVIZ_MESH
32 -h : This message
33
34 printf("Usage: %s [opts]\n", execname);
35 printf(" where [opts] is one or more of:\n");
36 printf(" -q : quiet mode - suppress all stdout\n");
37 printf(" -i <iterations> : number of cycles to run\n");
38 printf(" -s <size> : length of cube mesh along side\n");
39 printf(" -r <numregions> : Number of distinct regions (def: 11)\n");
40 printf(" -b <balance> : Load balance between regions of a domain (def: 1)\n");
41 printf(" -c <cost> : Extra cost of more expensive regions (def: 1)\n");
42 printf(" -f <numfiles> : Number of files to split viz dump into (def: (np+10)/9)\n");
43 printf(" -p : Print out progress\n");
44 printf(" -v : Output viz file (requires compiling with -DVIZ_MESH\n");
45 printf(" -h : This message\n");
46 printf("\n\n");
47
48 *Notable changes in LULESH 2.0
49
50 * Split functionality into different files
51 lulesh.cc - where most (all?) of the timed functionality lies
52 lulesh-comm.cc - MPI functionality
53 lulesh-init.cc - Setup code
54 lulesh-viz.cc - Support for visualization option
55 lulesh-util.cc - Non-timed functions
56 *
57 * The concept of "regions" was added, although every region is the same ideal
58 * gas material, and the same sedov blast wave problem is still the only
59 * problem its hardcoded to solve.
60 * Regions allow two things important to making this proxy app more representative:
61 * Four of the LULESH routines are now performed on a region-by-region basis,
62 * making the memory access patterns non-unit stride
63 * Artificial load imbalances can be easily introduced that could impact
64 * parallelization strategies.
65 * The load balance flag changes region assignment. Region number is raised to
66 * the power entered for assignment probability. Most likely regions changes
67 * with MPI process id.
68 * The cost flag raises the cost of ~45% of the regions to evaluate EOS by the
69 * entered multiple. The cost of 5% is 10x the entered multiple.
70 * MPI and OpenMP were added, and coalesced into a single version of the source
71 * that can support serial builds, MPI-only, OpenMP-only, and MPI+OpenMP
72 * Added support to write plot files using "poor mans parallel I/O" when linked
73 * with the silo library, which in turn can be read by VisIt.
74 * Enabled variable timestep calculation by default (courant condition), which
75 * results in an additional reduction.
76 * Default domain (mesh) size reduced from 45^3 to 30^3
77 * Command line options to allow numerous test cases without needing to recompile
78 * Performance optimizations and code cleanup beyond LULESH 1.0
79 * Added a "Figure of Merit" calculation (elements solved per microsecond) and
80 * output in support of using LULESH 2.0 for the 2017 CORAL procurement
81 *
82 * Possible Differences in Final Release (other changes possible)
83 *
84 * High Level mesh structure to allow data structure transformations
85 * Different default parameters
86 * Minor code performance changes and cleanup
87
88 TODO in future versions
89 * Add reader for (truly) unstructured meshes, probably serial only
90 * CMake based build system
91
92 //////////////
93
94 Redistribution and use in source and binary forms, with or without
95 modification, are permitted provided that the following conditions
96 are met:
97
98 * Redistributions of source code must retain the above copyright
99 notice, this list of conditions and the disclaimer below.
100
101 * Redistributions in binary form must reproduce the above copyright
102 notice, this list of conditions and the disclaimer (as noted below)
103 in the documentation and/or other materials provided with the
104 distribution.
105
106 * Neither the name of the LLNS/LLNL nor the names of its contributors
107 may be used to endorse or promote products derived from this software
108 without specific prior written permission.
109
110 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
111 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
112 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
113 ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
114 THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
115 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
116 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
117 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
118 OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
119 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
120 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
121
122
123 Additional BSD Notice
124
125 1. This notice is required to be provided under our contract with the U.S.
126 Department of Energy (DOE). This work was produced at Lawrence Livermore
127 National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
128
129 2. Neither the United States Government nor Lawrence Livermore National
130 Security, LLC nor any of their employees, makes any warranty, express
131 or implied, or assumes any liability or responsibility for the accuracy,
132 completeness, or usefulness of any information, apparatus, product, or
133 process disclosed, or represents that its use would not infringe
134 privately-owned rights.
135
136 3. Also, reference herein to any specific commercial products, process, or
137 services by trade name, trademark, manufacturer or otherwise does not
138 necessarily constitute or imply its endorsement, recommendation, or
139 favoring by the United States Government or Lawrence Livermore National
140 Security, LLC. The views and opinions of authors expressed herein do not
141 necessarily state or reflect those of the United States Government or
142 Lawrence Livermore National Security, LLC, and shall not be used for
143 advertising or product endorsement purposes.
144
145 */
146
147 #include <climits>
148 #include <vector>
149 #include <math.h>
150 #include <stdio.h>
151 #include <stdlib.h>
152 #include <string.h>
153 #include <ctype.h>
154 #include <time.h>
155 #include <sys/time.h>
156 #include <iostream>
157 #include <unistd.h>
158
159 #if LULESH_USE_OPENMP
160 # include <omp.h>
161 #endif
162
163 #include "lulesh.h"
164
165 #include "conduit.hpp"
166 #include "ascent.hpp"
167
168 using namespace conduit;
169 using namespace ascent;
170
171 /*********************************/
172 /* Data structure implementation */
173 /*********************************/
174
175 /* might want to add access methods so that memory can be */
176 /* better managed, as in luleshFT */
177
178 template <typename T>
Allocate(size_t size)179 T *Allocate(size_t size)
180 {
181 return static_cast<T *>(malloc(sizeof(T)*size)) ;
182 }
183
184 template <typename T>
Release(T ** ptr)185 void Release(T **ptr)
186 {
187 if (*ptr != NULL) {
188 free(*ptr) ;
189 *ptr = NULL ;
190 }
191 }
192
193
194
195 /******************************************/
196
197 /* Work Routines */
198
199 static inline
TimeIncrement(Domain & domain)200 void TimeIncrement(Domain& domain)
201 {
202 Real_t targetdt = domain.stoptime() - domain.time() ;
203
204 if ((domain.dtfixed() <= Real_t(0.0)) && (domain.cycle() != Int_t(0))) {
205 Real_t ratio ;
206 Real_t olddt = domain.deltatime() ;
207
208 /* This will require a reduction in parallel */
209 Real_t gnewdt = Real_t(1.0e+20) ;
210 Real_t newdt ;
211 if (domain.dtcourant() < gnewdt) {
212 gnewdt = domain.dtcourant() / Real_t(2.0) ;
213 }
214 if (domain.dthydro() < gnewdt) {
215 gnewdt = domain.dthydro() * Real_t(2.0) / Real_t(3.0) ;
216 }
217
218 #if USE_MPI
219 MPI_Allreduce(&gnewdt, &newdt, 1,
220 ((sizeof(Real_t) == 4) ? MPI_FLOAT : MPI_DOUBLE),
221 MPI_MIN, MPI_COMM_WORLD) ;
222 #else
223 newdt = gnewdt;
224 #endif
225
226 ratio = newdt / olddt ;
227 if (ratio >= Real_t(1.0)) {
228 if (ratio < domain.deltatimemultlb()) {
229 newdt = olddt ;
230 }
231 else if (ratio > domain.deltatimemultub()) {
232 newdt = olddt*domain.deltatimemultub() ;
233 }
234 }
235
236 if (newdt > domain.dtmax()) {
237 newdt = domain.dtmax() ;
238 }
239 domain.deltatime() = newdt ;
240 }
241
242 /* TRY TO PREVENT VERY SMALL SCALING ON THE NEXT CYCLE */
243 if ((targetdt > domain.deltatime()) &&
244 (targetdt < (Real_t(4.0) * domain.deltatime() / Real_t(3.0))) ) {
245 targetdt = Real_t(2.0) * domain.deltatime() / Real_t(3.0) ;
246 }
247
248 if (targetdt < domain.deltatime()) {
249 domain.deltatime() = targetdt ;
250 }
251
252 domain.time() += domain.deltatime() ;
253
254 ++domain.cycle() ;
255 }
256
257 /******************************************/
258
259 static inline
CollectDomainNodesToElemNodes(Domain & domain,const Index_t * elemToNode,Real_t elemX[8],Real_t elemY[8],Real_t elemZ[8])260 void CollectDomainNodesToElemNodes(Domain &domain,
261 const Index_t* elemToNode,
262 Real_t elemX[8],
263 Real_t elemY[8],
264 Real_t elemZ[8])
265 {
266 Index_t nd0i = elemToNode[0] ;
267 Index_t nd1i = elemToNode[1] ;
268 Index_t nd2i = elemToNode[2] ;
269 Index_t nd3i = elemToNode[3] ;
270 Index_t nd4i = elemToNode[4] ;
271 Index_t nd5i = elemToNode[5] ;
272 Index_t nd6i = elemToNode[6] ;
273 Index_t nd7i = elemToNode[7] ;
274
275 elemX[0] = domain.x(nd0i);
276 elemX[1] = domain.x(nd1i);
277 elemX[2] = domain.x(nd2i);
278 elemX[3] = domain.x(nd3i);
279 elemX[4] = domain.x(nd4i);
280 elemX[5] = domain.x(nd5i);
281 elemX[6] = domain.x(nd6i);
282 elemX[7] = domain.x(nd7i);
283
284 elemY[0] = domain.y(nd0i);
285 elemY[1] = domain.y(nd1i);
286 elemY[2] = domain.y(nd2i);
287 elemY[3] = domain.y(nd3i);
288 elemY[4] = domain.y(nd4i);
289 elemY[5] = domain.y(nd5i);
290 elemY[6] = domain.y(nd6i);
291 elemY[7] = domain.y(nd7i);
292
293 elemZ[0] = domain.z(nd0i);
294 elemZ[1] = domain.z(nd1i);
295 elemZ[2] = domain.z(nd2i);
296 elemZ[3] = domain.z(nd3i);
297 elemZ[4] = domain.z(nd4i);
298 elemZ[5] = domain.z(nd5i);
299 elemZ[6] = domain.z(nd6i);
300 elemZ[7] = domain.z(nd7i);
301
302 }
303
304 /******************************************/
305
306 static inline
InitStressTermsForElems(Domain & domain,Real_t * sigxx,Real_t * sigyy,Real_t * sigzz,Index_t numElem)307 void InitStressTermsForElems(Domain &domain,
308 Real_t *sigxx, Real_t *sigyy, Real_t *sigzz,
309 Index_t numElem)
310 {
311 //
312 // pull in the stresses appropriate to the hydro integration
313 //
314 #ifdef LULESH_USE_OPENMP
315 #pragma omp parallel for firstprivate(numElem)
316 #endif
317 for (Index_t i = 0 ; i < numElem ; ++i){
318 sigxx[i] = sigyy[i] = sigzz[i] = - domain.p(i) - domain.q(i) ;
319 }
320 }
321
322 /******************************************/
323
324 static inline
CalcElemShapeFunctionDerivatives(Real_t const x[],Real_t const y[],Real_t const z[],Real_t b[][8],Real_t * const volume)325 void CalcElemShapeFunctionDerivatives( Real_t const x[],
326 Real_t const y[],
327 Real_t const z[],
328 Real_t b[][8],
329 Real_t* const volume )
330 {
331 const Real_t x0 = x[0] ; const Real_t x1 = x[1] ;
332 const Real_t x2 = x[2] ; const Real_t x3 = x[3] ;
333 const Real_t x4 = x[4] ; const Real_t x5 = x[5] ;
334 const Real_t x6 = x[6] ; const Real_t x7 = x[7] ;
335
336 const Real_t y0 = y[0] ; const Real_t y1 = y[1] ;
337 const Real_t y2 = y[2] ; const Real_t y3 = y[3] ;
338 const Real_t y4 = y[4] ; const Real_t y5 = y[5] ;
339 const Real_t y6 = y[6] ; const Real_t y7 = y[7] ;
340
341 const Real_t z0 = z[0] ; const Real_t z1 = z[1] ;
342 const Real_t z2 = z[2] ; const Real_t z3 = z[3] ;
343 const Real_t z4 = z[4] ; const Real_t z5 = z[5] ;
344 const Real_t z6 = z[6] ; const Real_t z7 = z[7] ;
345
346 Real_t fjxxi, fjxet, fjxze;
347 Real_t fjyxi, fjyet, fjyze;
348 Real_t fjzxi, fjzet, fjzze;
349 Real_t cjxxi, cjxet, cjxze;
350 Real_t cjyxi, cjyet, cjyze;
351 Real_t cjzxi, cjzet, cjzze;
352
353 fjxxi = Real_t(.125) * ( (x6-x0) + (x5-x3) - (x7-x1) - (x4-x2) );
354 fjxet = Real_t(.125) * ( (x6-x0) - (x5-x3) + (x7-x1) - (x4-x2) );
355 fjxze = Real_t(.125) * ( (x6-x0) + (x5-x3) + (x7-x1) + (x4-x2) );
356
357 fjyxi = Real_t(.125) * ( (y6-y0) + (y5-y3) - (y7-y1) - (y4-y2) );
358 fjyet = Real_t(.125) * ( (y6-y0) - (y5-y3) + (y7-y1) - (y4-y2) );
359 fjyze = Real_t(.125) * ( (y6-y0) + (y5-y3) + (y7-y1) + (y4-y2) );
360
361 fjzxi = Real_t(.125) * ( (z6-z0) + (z5-z3) - (z7-z1) - (z4-z2) );
362 fjzet = Real_t(.125) * ( (z6-z0) - (z5-z3) + (z7-z1) - (z4-z2) );
363 fjzze = Real_t(.125) * ( (z6-z0) + (z5-z3) + (z7-z1) + (z4-z2) );
364
365 /* compute cofactors */
366 cjxxi = (fjyet * fjzze) - (fjzet * fjyze);
367 cjxet = - (fjyxi * fjzze) + (fjzxi * fjyze);
368 cjxze = (fjyxi * fjzet) - (fjzxi * fjyet);
369
370 cjyxi = - (fjxet * fjzze) + (fjzet * fjxze);
371 cjyet = (fjxxi * fjzze) - (fjzxi * fjxze);
372 cjyze = - (fjxxi * fjzet) + (fjzxi * fjxet);
373
374 cjzxi = (fjxet * fjyze) - (fjyet * fjxze);
375 cjzet = - (fjxxi * fjyze) + (fjyxi * fjxze);
376 cjzze = (fjxxi * fjyet) - (fjyxi * fjxet);
377
378 /* calculate partials :
379 this need only be done for l = 0,1,2,3 since , by symmetry ,
380 (6,7,4,5) = - (0,1,2,3) .
381 */
382 b[0][0] = - cjxxi - cjxet - cjxze;
383 b[0][1] = cjxxi - cjxet - cjxze;
384 b[0][2] = cjxxi + cjxet - cjxze;
385 b[0][3] = - cjxxi + cjxet - cjxze;
386 b[0][4] = -b[0][2];
387 b[0][5] = -b[0][3];
388 b[0][6] = -b[0][0];
389 b[0][7] = -b[0][1];
390
391 b[1][0] = - cjyxi - cjyet - cjyze;
392 b[1][1] = cjyxi - cjyet - cjyze;
393 b[1][2] = cjyxi + cjyet - cjyze;
394 b[1][3] = - cjyxi + cjyet - cjyze;
395 b[1][4] = -b[1][2];
396 b[1][5] = -b[1][3];
397 b[1][6] = -b[1][0];
398 b[1][7] = -b[1][1];
399
400 b[2][0] = - cjzxi - cjzet - cjzze;
401 b[2][1] = cjzxi - cjzet - cjzze;
402 b[2][2] = cjzxi + cjzet - cjzze;
403 b[2][3] = - cjzxi + cjzet - cjzze;
404 b[2][4] = -b[2][2];
405 b[2][5] = -b[2][3];
406 b[2][6] = -b[2][0];
407 b[2][7] = -b[2][1];
408
409 /* calculate jacobian determinant (volume) */
410 *volume = Real_t(8.) * ( fjxet * cjxet + fjyet * cjyet + fjzet * cjzet);
411 }
412
413 /******************************************/
414
415 static inline
SumElemFaceNormal(Real_t * normalX0,Real_t * normalY0,Real_t * normalZ0,Real_t * normalX1,Real_t * normalY1,Real_t * normalZ1,Real_t * normalX2,Real_t * normalY2,Real_t * normalZ2,Real_t * normalX3,Real_t * normalY3,Real_t * normalZ3,const Real_t x0,const Real_t y0,const Real_t z0,const Real_t x1,const Real_t y1,const Real_t z1,const Real_t x2,const Real_t y2,const Real_t z2,const Real_t x3,const Real_t y3,const Real_t z3)416 void SumElemFaceNormal(Real_t *normalX0, Real_t *normalY0, Real_t *normalZ0,
417 Real_t *normalX1, Real_t *normalY1, Real_t *normalZ1,
418 Real_t *normalX2, Real_t *normalY2, Real_t *normalZ2,
419 Real_t *normalX3, Real_t *normalY3, Real_t *normalZ3,
420 const Real_t x0, const Real_t y0, const Real_t z0,
421 const Real_t x1, const Real_t y1, const Real_t z1,
422 const Real_t x2, const Real_t y2, const Real_t z2,
423 const Real_t x3, const Real_t y3, const Real_t z3)
424 {
425 Real_t bisectX0 = Real_t(0.5) * (x3 + x2 - x1 - x0);
426 Real_t bisectY0 = Real_t(0.5) * (y3 + y2 - y1 - y0);
427 Real_t bisectZ0 = Real_t(0.5) * (z3 + z2 - z1 - z0);
428 Real_t bisectX1 = Real_t(0.5) * (x2 + x1 - x3 - x0);
429 Real_t bisectY1 = Real_t(0.5) * (y2 + y1 - y3 - y0);
430 Real_t bisectZ1 = Real_t(0.5) * (z2 + z1 - z3 - z0);
431 Real_t areaX = Real_t(0.25) * (bisectY0 * bisectZ1 - bisectZ0 * bisectY1);
432 Real_t areaY = Real_t(0.25) * (bisectZ0 * bisectX1 - bisectX0 * bisectZ1);
433 Real_t areaZ = Real_t(0.25) * (bisectX0 * bisectY1 - bisectY0 * bisectX1);
434
435 *normalX0 += areaX;
436 *normalX1 += areaX;
437 *normalX2 += areaX;
438 *normalX3 += areaX;
439
440 *normalY0 += areaY;
441 *normalY1 += areaY;
442 *normalY2 += areaY;
443 *normalY3 += areaY;
444
445 *normalZ0 += areaZ;
446 *normalZ1 += areaZ;
447 *normalZ2 += areaZ;
448 *normalZ3 += areaZ;
449 }
450
451 /******************************************/
452
453 static inline
CalcElemNodeNormals(Real_t pfx[8],Real_t pfy[8],Real_t pfz[8],const Real_t x[8],const Real_t y[8],const Real_t z[8])454 void CalcElemNodeNormals(Real_t pfx[8],
455 Real_t pfy[8],
456 Real_t pfz[8],
457 const Real_t x[8],
458 const Real_t y[8],
459 const Real_t z[8])
460 {
461 for (Index_t i = 0 ; i < 8 ; ++i) {
462 pfx[i] = Real_t(0.0);
463 pfy[i] = Real_t(0.0);
464 pfz[i] = Real_t(0.0);
465 }
466 /* evaluate face one: nodes 0, 1, 2, 3 */
467 SumElemFaceNormal(&pfx[0], &pfy[0], &pfz[0],
468 &pfx[1], &pfy[1], &pfz[1],
469 &pfx[2], &pfy[2], &pfz[2],
470 &pfx[3], &pfy[3], &pfz[3],
471 x[0], y[0], z[0], x[1], y[1], z[1],
472 x[2], y[2], z[2], x[3], y[3], z[3]);
473 /* evaluate face two: nodes 0, 4, 5, 1 */
474 SumElemFaceNormal(&pfx[0], &pfy[0], &pfz[0],
475 &pfx[4], &pfy[4], &pfz[4],
476 &pfx[5], &pfy[5], &pfz[5],
477 &pfx[1], &pfy[1], &pfz[1],
478 x[0], y[0], z[0], x[4], y[4], z[4],
479 x[5], y[5], z[5], x[1], y[1], z[1]);
480 /* evaluate face three: nodes 1, 5, 6, 2 */
481 SumElemFaceNormal(&pfx[1], &pfy[1], &pfz[1],
482 &pfx[5], &pfy[5], &pfz[5],
483 &pfx[6], &pfy[6], &pfz[6],
484 &pfx[2], &pfy[2], &pfz[2],
485 x[1], y[1], z[1], x[5], y[5], z[5],
486 x[6], y[6], z[6], x[2], y[2], z[2]);
487 /* evaluate face four: nodes 2, 6, 7, 3 */
488 SumElemFaceNormal(&pfx[2], &pfy[2], &pfz[2],
489 &pfx[6], &pfy[6], &pfz[6],
490 &pfx[7], &pfy[7], &pfz[7],
491 &pfx[3], &pfy[3], &pfz[3],
492 x[2], y[2], z[2], x[6], y[6], z[6],
493 x[7], y[7], z[7], x[3], y[3], z[3]);
494 /* evaluate face five: nodes 3, 7, 4, 0 */
495 SumElemFaceNormal(&pfx[3], &pfy[3], &pfz[3],
496 &pfx[7], &pfy[7], &pfz[7],
497 &pfx[4], &pfy[4], &pfz[4],
498 &pfx[0], &pfy[0], &pfz[0],
499 x[3], y[3], z[3], x[7], y[7], z[7],
500 x[4], y[4], z[4], x[0], y[0], z[0]);
501 /* evaluate face six: nodes 4, 7, 6, 5 */
502 SumElemFaceNormal(&pfx[4], &pfy[4], &pfz[4],
503 &pfx[7], &pfy[7], &pfz[7],
504 &pfx[6], &pfy[6], &pfz[6],
505 &pfx[5], &pfy[5], &pfz[5],
506 x[4], y[4], z[4], x[7], y[7], z[7],
507 x[6], y[6], z[6], x[5], y[5], z[5]);
508 }
509
510 /******************************************/
511
512 static inline
SumElemStressesToNodeForces(const Real_t B[][8],const Real_t stress_xx,const Real_t stress_yy,const Real_t stress_zz,Real_t fx[],Real_t fy[],Real_t fz[])513 void SumElemStressesToNodeForces( const Real_t B[][8],
514 const Real_t stress_xx,
515 const Real_t stress_yy,
516 const Real_t stress_zz,
517 Real_t fx[], Real_t fy[], Real_t fz[] )
518 {
519 for(Index_t i = 0; i < 8; i++) {
520 fx[i] = -( stress_xx * B[0][i] );
521 fy[i] = -( stress_yy * B[1][i] );
522 fz[i] = -( stress_zz * B[2][i] );
523 }
524 }
525
526 /******************************************/
527
528 static inline
IntegrateStressForElems(Domain & domain,Real_t * sigxx,Real_t * sigyy,Real_t * sigzz,Real_t * determ,Index_t numElem,Index_t numNode)529 void IntegrateStressForElems( Domain &domain,
530 Real_t *sigxx, Real_t *sigyy, Real_t *sigzz,
531 Real_t *determ, Index_t numElem, Index_t numNode)
532 {
533 #if LULESH_USE_OPENMP
534 Index_t numthreads = omp_get_max_threads();
535 #else
536 Index_t numthreads = 1;
537 #endif
538
539 Index_t numElem8 = numElem * 8 ;
540 Real_t *fx_elem;
541 Real_t *fy_elem;
542 Real_t *fz_elem;
543 Real_t fx_local[8] ;
544 Real_t fy_local[8] ;
545 Real_t fz_local[8] ;
546
547
548 if (numthreads > 1) {
549 fx_elem = Allocate<Real_t>(numElem8) ;
550 fy_elem = Allocate<Real_t>(numElem8) ;
551 fz_elem = Allocate<Real_t>(numElem8) ;
552 }
553 // loop over all elements
554
555 #ifdef LULESH_USE_OPENMP
556 #pragma omp parallel for firstprivate(numElem)
557 #endif
558 for( Index_t k=0 ; k<numElem ; ++k )
559 {
560 const Index_t* const elemToNode = domain.nodelist(k);
561 Real_t B[3][8] ;// shape function derivatives
562 Real_t x_local[8] ;
563 Real_t y_local[8] ;
564 Real_t z_local[8] ;
565
566 // get nodal coordinates from global arrays and copy into local arrays.
567 CollectDomainNodesToElemNodes(domain, elemToNode, x_local, y_local, z_local);
568
569 // Volume calculation involves extra work for numerical consistency
570 CalcElemShapeFunctionDerivatives(x_local, y_local, z_local,
571 B, &determ[k]);
572
573 CalcElemNodeNormals( B[0] , B[1], B[2],
574 x_local, y_local, z_local );
575
576 if (numthreads > 1) {
577 // Eliminate thread writing conflicts at the nodes by giving
578 // each element its own copy to write to
579 SumElemStressesToNodeForces( B, sigxx[k], sigyy[k], sigzz[k],
580 &fx_elem[k*8],
581 &fy_elem[k*8],
582 &fz_elem[k*8] ) ;
583 }
584 else {
585 SumElemStressesToNodeForces( B, sigxx[k], sigyy[k], sigzz[k],
586 fx_local, fy_local, fz_local ) ;
587
588 // copy nodal force contributions to global force arrray.
589 for( Index_t lnode=0 ; lnode<8 ; ++lnode ) {
590 Index_t gnode = elemToNode[lnode];
591 domain.fx(gnode) += fx_local[lnode];
592 domain.fy(gnode) += fy_local[lnode];
593 domain.fz(gnode) += fz_local[lnode];
594 }
595 }
596 }
597
598 if (numthreads > 1) {
599 // If threaded, then we need to copy the data out of the temporary
600 // arrays used above into the final forces field
601 #ifdef LULESH_USE_OPENMP
602 #pragma omp parallel for firstprivate(numNode)
603 #endif
604 for( Index_t gnode=0 ; gnode<numNode ; ++gnode )
605 {
606 Index_t count = domain.nodeElemCount(gnode) ;
607 Index_t *cornerList = domain.nodeElemCornerList(gnode) ;
608 Real_t fx_tmp = Real_t(0.0) ;
609 Real_t fy_tmp = Real_t(0.0) ;
610 Real_t fz_tmp = Real_t(0.0) ;
611 for (Index_t i=0 ; i < count ; ++i) {
612 Index_t elem = cornerList[i] ;
613 fx_tmp += fx_elem[elem] ;
614 fy_tmp += fy_elem[elem] ;
615 fz_tmp += fz_elem[elem] ;
616 }
617 domain.fx(gnode) = fx_tmp ;
618 domain.fy(gnode) = fy_tmp ;
619 domain.fz(gnode) = fz_tmp ;
620 }
621 Release(&fz_elem) ;
622 Release(&fy_elem) ;
623 Release(&fx_elem) ;
624 }
625 }
626
627 /******************************************/
628
629 static inline
VoluDer(const Real_t x0,const Real_t x1,const Real_t x2,const Real_t x3,const Real_t x4,const Real_t x5,const Real_t y0,const Real_t y1,const Real_t y2,const Real_t y3,const Real_t y4,const Real_t y5,const Real_t z0,const Real_t z1,const Real_t z2,const Real_t z3,const Real_t z4,const Real_t z5,Real_t * dvdx,Real_t * dvdy,Real_t * dvdz)630 void VoluDer(const Real_t x0, const Real_t x1, const Real_t x2,
631 const Real_t x3, const Real_t x4, const Real_t x5,
632 const Real_t y0, const Real_t y1, const Real_t y2,
633 const Real_t y3, const Real_t y4, const Real_t y5,
634 const Real_t z0, const Real_t z1, const Real_t z2,
635 const Real_t z3, const Real_t z4, const Real_t z5,
636 Real_t* dvdx, Real_t* dvdy, Real_t* dvdz)
637 {
638 const Real_t twelfth = Real_t(1.0) / Real_t(12.0) ;
639
640 *dvdx =
641 (y1 + y2) * (z0 + z1) - (y0 + y1) * (z1 + z2) +
642 (y0 + y4) * (z3 + z4) - (y3 + y4) * (z0 + z4) -
643 (y2 + y5) * (z3 + z5) + (y3 + y5) * (z2 + z5);
644 *dvdy =
645 - (x1 + x2) * (z0 + z1) + (x0 + x1) * (z1 + z2) -
646 (x0 + x4) * (z3 + z4) + (x3 + x4) * (z0 + z4) +
647 (x2 + x5) * (z3 + z5) - (x3 + x5) * (z2 + z5);
648
649 *dvdz =
650 - (y1 + y2) * (x0 + x1) + (y0 + y1) * (x1 + x2) -
651 (y0 + y4) * (x3 + x4) + (y3 + y4) * (x0 + x4) +
652 (y2 + y5) * (x3 + x5) - (y3 + y5) * (x2 + x5);
653
654 *dvdx *= twelfth;
655 *dvdy *= twelfth;
656 *dvdz *= twelfth;
657 }
658
659 /******************************************/
660
661 static inline
CalcElemVolumeDerivative(Real_t dvdx[8],Real_t dvdy[8],Real_t dvdz[8],const Real_t x[8],const Real_t y[8],const Real_t z[8])662 void CalcElemVolumeDerivative(Real_t dvdx[8],
663 Real_t dvdy[8],
664 Real_t dvdz[8],
665 const Real_t x[8],
666 const Real_t y[8],
667 const Real_t z[8])
668 {
669 VoluDer(x[1], x[2], x[3], x[4], x[5], x[7],
670 y[1], y[2], y[3], y[4], y[5], y[7],
671 z[1], z[2], z[3], z[4], z[5], z[7],
672 &dvdx[0], &dvdy[0], &dvdz[0]);
673 VoluDer(x[0], x[1], x[2], x[7], x[4], x[6],
674 y[0], y[1], y[2], y[7], y[4], y[6],
675 z[0], z[1], z[2], z[7], z[4], z[6],
676 &dvdx[3], &dvdy[3], &dvdz[3]);
677 VoluDer(x[3], x[0], x[1], x[6], x[7], x[5],
678 y[3], y[0], y[1], y[6], y[7], y[5],
679 z[3], z[0], z[1], z[6], z[7], z[5],
680 &dvdx[2], &dvdy[2], &dvdz[2]);
681 VoluDer(x[2], x[3], x[0], x[5], x[6], x[4],
682 y[2], y[3], y[0], y[5], y[6], y[4],
683 z[2], z[3], z[0], z[5], z[6], z[4],
684 &dvdx[1], &dvdy[1], &dvdz[1]);
685 VoluDer(x[7], x[6], x[5], x[0], x[3], x[1],
686 y[7], y[6], y[5], y[0], y[3], y[1],
687 z[7], z[6], z[5], z[0], z[3], z[1],
688 &dvdx[4], &dvdy[4], &dvdz[4]);
689 VoluDer(x[4], x[7], x[6], x[1], x[0], x[2],
690 y[4], y[7], y[6], y[1], y[0], y[2],
691 z[4], z[7], z[6], z[1], z[0], z[2],
692 &dvdx[5], &dvdy[5], &dvdz[5]);
693 VoluDer(x[5], x[4], x[7], x[2], x[1], x[3],
694 y[5], y[4], y[7], y[2], y[1], y[3],
695 z[5], z[4], z[7], z[2], z[1], z[3],
696 &dvdx[6], &dvdy[6], &dvdz[6]);
697 VoluDer(x[6], x[5], x[4], x[3], x[2], x[0],
698 y[6], y[5], y[4], y[3], y[2], y[0],
699 z[6], z[5], z[4], z[3], z[2], z[0],
700 &dvdx[7], &dvdy[7], &dvdz[7]);
701 }
702
703 /******************************************/
704
705 static inline
CalcElemFBHourglassForce(Real_t * xd,Real_t * yd,Real_t * zd,Real_t hourgam[][4],Real_t coefficient,Real_t * hgfx,Real_t * hgfy,Real_t * hgfz)706 void CalcElemFBHourglassForce(Real_t *xd, Real_t *yd, Real_t *zd, Real_t hourgam[][4],
707 Real_t coefficient,
708 Real_t *hgfx, Real_t *hgfy, Real_t *hgfz )
709 {
710 Real_t hxx[4];
711 for(Index_t i = 0; i < 4; i++) {
712 hxx[i] = hourgam[0][i] * xd[0] + hourgam[1][i] * xd[1] +
713 hourgam[2][i] * xd[2] + hourgam[3][i] * xd[3] +
714 hourgam[4][i] * xd[4] + hourgam[5][i] * xd[5] +
715 hourgam[6][i] * xd[6] + hourgam[7][i] * xd[7];
716 }
717 for(Index_t i = 0; i < 8; i++) {
718 hgfx[i] = coefficient *
719 (hourgam[i][0] * hxx[0] + hourgam[i][1] * hxx[1] +
720 hourgam[i][2] * hxx[2] + hourgam[i][3] * hxx[3]);
721 }
722 for(Index_t i = 0; i < 4; i++) {
723 hxx[i] = hourgam[0][i] * yd[0] + hourgam[1][i] * yd[1] +
724 hourgam[2][i] * yd[2] + hourgam[3][i] * yd[3] +
725 hourgam[4][i] * yd[4] + hourgam[5][i] * yd[5] +
726 hourgam[6][i] * yd[6] + hourgam[7][i] * yd[7];
727 }
728 for(Index_t i = 0; i < 8; i++) {
729 hgfy[i] = coefficient *
730 (hourgam[i][0] * hxx[0] + hourgam[i][1] * hxx[1] +
731 hourgam[i][2] * hxx[2] + hourgam[i][3] * hxx[3]);
732 }
733 for(Index_t i = 0; i < 4; i++) {
734 hxx[i] = hourgam[0][i] * zd[0] + hourgam[1][i] * zd[1] +
735 hourgam[2][i] * zd[2] + hourgam[3][i] * zd[3] +
736 hourgam[4][i] * zd[4] + hourgam[5][i] * zd[5] +
737 hourgam[6][i] * zd[6] + hourgam[7][i] * zd[7];
738 }
739 for(Index_t i = 0; i < 8; i++) {
740 hgfz[i] = coefficient *
741 (hourgam[i][0] * hxx[0] + hourgam[i][1] * hxx[1] +
742 hourgam[i][2] * hxx[2] + hourgam[i][3] * hxx[3]);
743 }
744 }
745
746 /******************************************/
747
748 static inline
CalcFBHourglassForceForElems(Domain & domain,Real_t * determ,Real_t * x8n,Real_t * y8n,Real_t * z8n,Real_t * dvdx,Real_t * dvdy,Real_t * dvdz,Real_t hourg,Index_t numElem,Index_t numNode)749 void CalcFBHourglassForceForElems( Domain &domain,
750 Real_t *determ,
751 Real_t *x8n, Real_t *y8n, Real_t *z8n,
752 Real_t *dvdx, Real_t *dvdy, Real_t *dvdz,
753 Real_t hourg, Index_t numElem,
754 Index_t numNode)
755 {
756
757 #if LULESH_USE_OPENMP
758 Index_t numthreads = omp_get_max_threads();
759 #else
760 Index_t numthreads = 1;
761 #endif
762 /*************************************************
763 *
764 * FUNCTION: Calculates the Flanagan-Belytschko anti-hourglass
765 * force.
766 *
767 *************************************************/
768
769 Index_t numElem8 = numElem * 8 ;
770
771 Real_t *fx_elem;
772 Real_t *fy_elem;
773 Real_t *fz_elem;
774
775 if(numthreads > 1) {
776 fx_elem = Allocate<Real_t>(numElem8) ;
777 fy_elem = Allocate<Real_t>(numElem8) ;
778 fz_elem = Allocate<Real_t>(numElem8) ;
779 }
780
781 Real_t gamma[4][8];
782
783 gamma[0][0] = Real_t( 1.);
784 gamma[0][1] = Real_t( 1.);
785 gamma[0][2] = Real_t(-1.);
786 gamma[0][3] = Real_t(-1.);
787 gamma[0][4] = Real_t(-1.);
788 gamma[0][5] = Real_t(-1.);
789 gamma[0][6] = Real_t( 1.);
790 gamma[0][7] = Real_t( 1.);
791 gamma[1][0] = Real_t( 1.);
792 gamma[1][1] = Real_t(-1.);
793 gamma[1][2] = Real_t(-1.);
794 gamma[1][3] = Real_t( 1.);
795 gamma[1][4] = Real_t(-1.);
796 gamma[1][5] = Real_t( 1.);
797 gamma[1][6] = Real_t( 1.);
798 gamma[1][7] = Real_t(-1.);
799 gamma[2][0] = Real_t( 1.);
800 gamma[2][1] = Real_t(-1.);
801 gamma[2][2] = Real_t( 1.);
802 gamma[2][3] = Real_t(-1.);
803 gamma[2][4] = Real_t( 1.);
804 gamma[2][5] = Real_t(-1.);
805 gamma[2][6] = Real_t( 1.);
806 gamma[2][7] = Real_t(-1.);
807 gamma[3][0] = Real_t(-1.);
808 gamma[3][1] = Real_t( 1.);
809 gamma[3][2] = Real_t(-1.);
810 gamma[3][3] = Real_t( 1.);
811 gamma[3][4] = Real_t( 1.);
812 gamma[3][5] = Real_t(-1.);
813 gamma[3][6] = Real_t( 1.);
814 gamma[3][7] = Real_t(-1.);
815
816 /*************************************************/
817 /* compute the hourglass modes */
818
819
820 #ifdef LULESH_USE_OPENMP
821 #pragma omp parallel for firstprivate(numElem, hourg)
822 #endif
823 for(Index_t i2=0;i2<numElem;++i2){
824 Real_t *fx_local, *fy_local, *fz_local ;
825 Real_t hgfx[8], hgfy[8], hgfz[8] ;
826
827 Real_t coefficient;
828
829 Real_t hourgam[8][4];
830 Real_t xd1[8], yd1[8], zd1[8] ;
831
832 const Index_t *elemToNode = domain.nodelist(i2);
833 Index_t i3=8*i2;
834 Real_t volinv=Real_t(1.0)/determ[i2];
835 Real_t ss1, mass1, volume13 ;
836 for(Index_t i1=0;i1<4;++i1){
837
838 Real_t hourmodx =
839 x8n[i3] * gamma[i1][0] + x8n[i3+1] * gamma[i1][1] +
840 x8n[i3+2] * gamma[i1][2] + x8n[i3+3] * gamma[i1][3] +
841 x8n[i3+4] * gamma[i1][4] + x8n[i3+5] * gamma[i1][5] +
842 x8n[i3+6] * gamma[i1][6] + x8n[i3+7] * gamma[i1][7];
843
844 Real_t hourmody =
845 y8n[i3] * gamma[i1][0] + y8n[i3+1] * gamma[i1][1] +
846 y8n[i3+2] * gamma[i1][2] + y8n[i3+3] * gamma[i1][3] +
847 y8n[i3+4] * gamma[i1][4] + y8n[i3+5] * gamma[i1][5] +
848 y8n[i3+6] * gamma[i1][6] + y8n[i3+7] * gamma[i1][7];
849
850 Real_t hourmodz =
851 z8n[i3] * gamma[i1][0] + z8n[i3+1] * gamma[i1][1] +
852 z8n[i3+2] * gamma[i1][2] + z8n[i3+3] * gamma[i1][3] +
853 z8n[i3+4] * gamma[i1][4] + z8n[i3+5] * gamma[i1][5] +
854 z8n[i3+6] * gamma[i1][6] + z8n[i3+7] * gamma[i1][7];
855
856 hourgam[0][i1] = gamma[i1][0] - volinv*(dvdx[i3 ] * hourmodx +
857 dvdy[i3 ] * hourmody +
858 dvdz[i3 ] * hourmodz );
859
860 hourgam[1][i1] = gamma[i1][1] - volinv*(dvdx[i3+1] * hourmodx +
861 dvdy[i3+1] * hourmody +
862 dvdz[i3+1] * hourmodz );
863
864 hourgam[2][i1] = gamma[i1][2] - volinv*(dvdx[i3+2] * hourmodx +
865 dvdy[i3+2] * hourmody +
866 dvdz[i3+2] * hourmodz );
867
868 hourgam[3][i1] = gamma[i1][3] - volinv*(dvdx[i3+3] * hourmodx +
869 dvdy[i3+3] * hourmody +
870 dvdz[i3+3] * hourmodz );
871
872 hourgam[4][i1] = gamma[i1][4] - volinv*(dvdx[i3+4] * hourmodx +
873 dvdy[i3+4] * hourmody +
874 dvdz[i3+4] * hourmodz );
875
876 hourgam[5][i1] = gamma[i1][5] - volinv*(dvdx[i3+5] * hourmodx +
877 dvdy[i3+5] * hourmody +
878 dvdz[i3+5] * hourmodz );
879
880 hourgam[6][i1] = gamma[i1][6] - volinv*(dvdx[i3+6] * hourmodx +
881 dvdy[i3+6] * hourmody +
882 dvdz[i3+6] * hourmodz );
883
884 hourgam[7][i1] = gamma[i1][7] - volinv*(dvdx[i3+7] * hourmodx +
885 dvdy[i3+7] * hourmody +
886 dvdz[i3+7] * hourmodz );
887
888 }
889
890 /* compute forces */
891 /* store forces into h arrays (force arrays) */
892
893 ss1=domain.ss(i2);
894 mass1=domain.elemMass(i2);
895 volume13=CBRT(determ[i2]);
896
897 Index_t n0si2 = elemToNode[0];
898 Index_t n1si2 = elemToNode[1];
899 Index_t n2si2 = elemToNode[2];
900 Index_t n3si2 = elemToNode[3];
901 Index_t n4si2 = elemToNode[4];
902 Index_t n5si2 = elemToNode[5];
903 Index_t n6si2 = elemToNode[6];
904 Index_t n7si2 = elemToNode[7];
905
906 xd1[0] = domain.xd(n0si2);
907 xd1[1] = domain.xd(n1si2);
908 xd1[2] = domain.xd(n2si2);
909 xd1[3] = domain.xd(n3si2);
910 xd1[4] = domain.xd(n4si2);
911 xd1[5] = domain.xd(n5si2);
912 xd1[6] = domain.xd(n6si2);
913 xd1[7] = domain.xd(n7si2);
914
915 yd1[0] = domain.yd(n0si2);
916 yd1[1] = domain.yd(n1si2);
917 yd1[2] = domain.yd(n2si2);
918 yd1[3] = domain.yd(n3si2);
919 yd1[4] = domain.yd(n4si2);
920 yd1[5] = domain.yd(n5si2);
921 yd1[6] = domain.yd(n6si2);
922 yd1[7] = domain.yd(n7si2);
923
924 zd1[0] = domain.zd(n0si2);
925 zd1[1] = domain.zd(n1si2);
926 zd1[2] = domain.zd(n2si2);
927 zd1[3] = domain.zd(n3si2);
928 zd1[4] = domain.zd(n4si2);
929 zd1[5] = domain.zd(n5si2);
930 zd1[6] = domain.zd(n6si2);
931 zd1[7] = domain.zd(n7si2);
932
933 coefficient = - hourg * Real_t(0.01) * ss1 * mass1 / volume13;
934
935 CalcElemFBHourglassForce(xd1,yd1,zd1,
936 hourgam,
937 coefficient, hgfx, hgfy, hgfz);
938
939 // With the threaded version, we write into local arrays per elem
940 // so we don't have to worry about race conditions
941 if (numthreads > 1) {
942 fx_local = &fx_elem[i3] ;
943 fx_local[0] = hgfx[0];
944 fx_local[1] = hgfx[1];
945 fx_local[2] = hgfx[2];
946 fx_local[3] = hgfx[3];
947 fx_local[4] = hgfx[4];
948 fx_local[5] = hgfx[5];
949 fx_local[6] = hgfx[6];
950 fx_local[7] = hgfx[7];
951
952 fy_local = &fy_elem[i3] ;
953 fy_local[0] = hgfy[0];
954 fy_local[1] = hgfy[1];
955 fy_local[2] = hgfy[2];
956 fy_local[3] = hgfy[3];
957 fy_local[4] = hgfy[4];
958 fy_local[5] = hgfy[5];
959 fy_local[6] = hgfy[6];
960 fy_local[7] = hgfy[7];
961
962 fz_local = &fz_elem[i3] ;
963 fz_local[0] = hgfz[0];
964 fz_local[1] = hgfz[1];
965 fz_local[2] = hgfz[2];
966 fz_local[3] = hgfz[3];
967 fz_local[4] = hgfz[4];
968 fz_local[5] = hgfz[5];
969 fz_local[6] = hgfz[6];
970 fz_local[7] = hgfz[7];
971 }
972 else {
973 domain.fx(n0si2) += hgfx[0];
974 domain.fy(n0si2) += hgfy[0];
975 domain.fz(n0si2) += hgfz[0];
976
977 domain.fx(n1si2) += hgfx[1];
978 domain.fy(n1si2) += hgfy[1];
979 domain.fz(n1si2) += hgfz[1];
980
981 domain.fx(n2si2) += hgfx[2];
982 domain.fy(n2si2) += hgfy[2];
983 domain.fz(n2si2) += hgfz[2];
984
985 domain.fx(n3si2) += hgfx[3];
986 domain.fy(n3si2) += hgfy[3];
987 domain.fz(n3si2) += hgfz[3];
988
989 domain.fx(n4si2) += hgfx[4];
990 domain.fy(n4si2) += hgfy[4];
991 domain.fz(n4si2) += hgfz[4];
992
993 domain.fx(n5si2) += hgfx[5];
994 domain.fy(n5si2) += hgfy[5];
995 domain.fz(n5si2) += hgfz[5];
996
997 domain.fx(n6si2) += hgfx[6];
998 domain.fy(n6si2) += hgfy[6];
999 domain.fz(n6si2) += hgfz[6];
1000
1001 domain.fx(n7si2) += hgfx[7];
1002 domain.fy(n7si2) += hgfy[7];
1003 domain.fz(n7si2) += hgfz[7];
1004 }
1005 }
1006
1007 if (numthreads > 1) {
1008 // Collect the data from the local arrays into the final force arrays
1009 #ifdef LULESH_USE_OPENMP
1010 #pragma omp parallel for firstprivate(numNode)
1011 #endif
1012 for( Index_t gnode=0 ; gnode<numNode ; ++gnode )
1013 {
1014 Index_t count = domain.nodeElemCount(gnode) ;
1015 Index_t *cornerList = domain.nodeElemCornerList(gnode) ;
1016 Real_t fx_tmp = Real_t(0.0) ;
1017 Real_t fy_tmp = Real_t(0.0) ;
1018 Real_t fz_tmp = Real_t(0.0) ;
1019 for (Index_t i=0 ; i < count ; ++i) {
1020 Index_t elem = cornerList[i] ;
1021 fx_tmp += fx_elem[elem] ;
1022 fy_tmp += fy_elem[elem] ;
1023 fz_tmp += fz_elem[elem] ;
1024 }
1025 domain.fx(gnode) += fx_tmp ;
1026 domain.fy(gnode) += fy_tmp ;
1027 domain.fz(gnode) += fz_tmp ;
1028 }
1029 Release(&fz_elem) ;
1030 Release(&fy_elem) ;
1031 Release(&fx_elem) ;
1032 }
1033 }
1034
1035 /******************************************/
1036
1037 static inline
CalcHourglassControlForElems(Domain & domain,Real_t determ[],Real_t hgcoef)1038 void CalcHourglassControlForElems(Domain& domain,
1039 Real_t determ[], Real_t hgcoef)
1040 {
1041 Index_t numElem = domain.numElem() ;
1042 Index_t numElem8 = numElem * 8 ;
1043 Real_t *dvdx = Allocate<Real_t>(numElem8) ;
1044 Real_t *dvdy = Allocate<Real_t>(numElem8) ;
1045 Real_t *dvdz = Allocate<Real_t>(numElem8) ;
1046 Real_t *x8n = Allocate<Real_t>(numElem8) ;
1047 Real_t *y8n = Allocate<Real_t>(numElem8) ;
1048 Real_t *z8n = Allocate<Real_t>(numElem8) ;
1049
1050 /* start loop over elements */
1051 #ifdef LULESH_USE_OPENMP
1052 #pragma omp parallel for firstprivate(numElem)
1053 #endif
1054 for (Index_t i=0 ; i<numElem ; ++i){
1055 Real_t x1[8], y1[8], z1[8] ;
1056 Real_t pfx[8], pfy[8], pfz[8] ;
1057
1058 Index_t* elemToNode = domain.nodelist(i);
1059 CollectDomainNodesToElemNodes(domain, elemToNode, x1, y1, z1);
1060
1061 CalcElemVolumeDerivative(pfx, pfy, pfz, x1, y1, z1);
1062
1063 /* load into temporary storage for FB Hour Glass control */
1064 for(Index_t ii=0;ii<8;++ii){
1065 Index_t jj=8*i+ii;
1066
1067 dvdx[jj] = pfx[ii];
1068 dvdy[jj] = pfy[ii];
1069 dvdz[jj] = pfz[ii];
1070 x8n[jj] = x1[ii];
1071 y8n[jj] = y1[ii];
1072 z8n[jj] = z1[ii];
1073 }
1074
1075 determ[i] = domain.volo(i) * domain.v(i);
1076
1077 /* Do a check for negative volumes */
1078 if ( domain.v(i) <= Real_t(0.0) ) {
1079 #if USE_MPI
1080 MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
1081 #else
1082 exit(VolumeError);
1083 #endif
1084 }
1085 }
1086
1087 if ( hgcoef > Real_t(0.) ) {
1088 CalcFBHourglassForceForElems( domain,
1089 determ, x8n, y8n, z8n, dvdx, dvdy, dvdz,
1090 hgcoef, numElem, domain.numNode()) ;
1091 }
1092
1093 Release(&z8n) ;
1094 Release(&y8n) ;
1095 Release(&x8n) ;
1096 Release(&dvdz) ;
1097 Release(&dvdy) ;
1098 Release(&dvdx) ;
1099
1100 return ;
1101 }
1102
1103 /******************************************/
1104
1105 static inline
CalcVolumeForceForElems(Domain & domain)1106 void CalcVolumeForceForElems(Domain& domain)
1107 {
1108 Index_t numElem = domain.numElem() ;
1109 if (numElem != 0) {
1110 Real_t hgcoef = domain.hgcoef() ;
1111 Real_t *sigxx = Allocate<Real_t>(numElem) ;
1112 Real_t *sigyy = Allocate<Real_t>(numElem) ;
1113 Real_t *sigzz = Allocate<Real_t>(numElem) ;
1114 Real_t *determ = Allocate<Real_t>(numElem) ;
1115
1116 /* Sum contributions to total stress tensor */
1117 InitStressTermsForElems(domain, sigxx, sigyy, sigzz, numElem);
1118
1119 // call elemlib stress integration loop to produce nodal forces from
1120 // material stresses.
1121 IntegrateStressForElems( domain,
1122 sigxx, sigyy, sigzz, determ, numElem,
1123 domain.numNode()) ;
1124
1125 // check for negative element volume
1126 #ifdef LULESH_USE_OPENMP
1127 #pragma omp parallel for firstprivate(numElem)
1128 #endif
1129 for ( Index_t k=0 ; k<numElem ; ++k ) {
1130 if (determ[k] <= Real_t(0.0)) {
1131 #if USE_MPI
1132 MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
1133 #else
1134 exit(VolumeError);
1135 #endif
1136 }
1137 }
1138
1139 CalcHourglassControlForElems(domain, determ, hgcoef) ;
1140
1141 Release(&determ) ;
1142 Release(&sigzz) ;
1143 Release(&sigyy) ;
1144 Release(&sigxx) ;
1145 }
1146 }
1147
1148 /******************************************/
1149
CalcForceForNodes(Domain & domain)1150 static inline void CalcForceForNodes(Domain& domain)
1151 {
1152 Index_t numNode = domain.numNode() ;
1153
1154 #if USE_MPI
1155 CommRecv(domain, MSG_COMM_SBN, 3,
1156 domain.sizeX() + 1, domain.sizeY() + 1, domain.sizeZ() + 1,
1157 true, false) ;
1158 #endif
1159
1160 #ifdef LULESH_USE_OPENMP
1161 #pragma omp parallel for firstprivate(numNode)
1162 #endif
1163 for (Index_t i=0; i<numNode; ++i) {
1164 domain.fx(i) = Real_t(0.0) ;
1165 domain.fy(i) = Real_t(0.0) ;
1166 domain.fz(i) = Real_t(0.0) ;
1167 }
1168
1169 /* Calcforce calls partial, force, hourq */
1170 CalcVolumeForceForElems(domain) ;
1171
1172 #if USE_MPI
1173 Domain_member fieldData[3] ;
1174 fieldData[0] = &Domain::fx ;
1175 fieldData[1] = &Domain::fy ;
1176 fieldData[2] = &Domain::fz ;
1177
1178 CommSend(domain, MSG_COMM_SBN, 3, fieldData,
1179 domain.sizeX() + 1, domain.sizeY() + 1, domain.sizeZ() + 1,
1180 true, false) ;
1181 CommSBN(domain, 3, fieldData) ;
1182 #endif
1183 }
1184
1185 /******************************************/
1186
1187 static inline
CalcAccelerationForNodes(Domain & domain,Index_t numNode)1188 void CalcAccelerationForNodes(Domain &domain, Index_t numNode)
1189 {
1190 #ifdef LULESH_USE_OPENMP
1191 #pragma omp parallel for firstprivate(numNode)
1192 #endif
1193 for (Index_t i = 0; i < numNode; ++i) {
1194 domain.xdd(i) = domain.fx(i) / domain.nodalMass(i);
1195 domain.ydd(i) = domain.fy(i) / domain.nodalMass(i);
1196 domain.zdd(i) = domain.fz(i) / domain.nodalMass(i);
1197 }
1198 }
1199
1200 /******************************************/
1201
1202 static inline
ApplyAccelerationBoundaryConditionsForNodes(Domain & domain)1203 void ApplyAccelerationBoundaryConditionsForNodes(Domain& domain)
1204 {
1205 Index_t size = domain.sizeX();
1206 Index_t numNodeBC = (size+1)*(size+1) ;
1207
1208 #ifdef LULESH_USE_OPENMP
1209 #pragma omp parallel
1210 #endif
1211 {
1212 if (!domain.symmXempty() != 0) {
1213 #ifdef LULESH_USE_OPENMP
1214 #pragma omp for nowait firstprivate(numNodeBC)
1215 #endif
1216 for(Index_t i=0 ; i<numNodeBC ; ++i)
1217 domain.xdd(domain.symmX(i)) = Real_t(0.0) ;
1218 }
1219
1220 if (!domain.symmYempty() != 0) {
1221 #ifdef LULESH_USE_OPENMP
1222 #pragma omp for nowait firstprivate(numNodeBC)
1223 #endif
1224 for(Index_t i=0 ; i<numNodeBC ; ++i)
1225 domain.ydd(domain.symmY(i)) = Real_t(0.0) ;
1226 }
1227
1228 if (!domain.symmZempty() != 0) {
1229 #ifdef LULESH_USE_OPENMP
1230 #pragma omp for nowait firstprivate(numNodeBC)
1231 #endif
1232 for(Index_t i=0 ; i<numNodeBC ; ++i)
1233 domain.zdd(domain.symmZ(i)) = Real_t(0.0) ;
1234 }
1235 }
1236 }
1237
1238 /******************************************/
1239
1240 static inline
CalcVelocityForNodes(Domain & domain,const Real_t dt,const Real_t u_cut,Index_t numNode)1241 void CalcVelocityForNodes(Domain &domain, const Real_t dt, const Real_t u_cut,
1242 Index_t numNode)
1243 {
1244
1245 #ifdef LULESH_USE_OPENMP
1246 #pragma omp parallel for firstprivate(numNode)
1247 #endif
1248 for ( Index_t i = 0 ; i < numNode ; ++i )
1249 {
1250 Real_t xdtmp, ydtmp, zdtmp ;
1251
1252 xdtmp = domain.xd(i) + domain.xdd(i) * dt ;
1253 if( FABS(xdtmp) < u_cut ) xdtmp = Real_t(0.0);
1254 domain.xd(i) = xdtmp ;
1255
1256 ydtmp = domain.yd(i) + domain.ydd(i) * dt ;
1257 if( FABS(ydtmp) < u_cut ) ydtmp = Real_t(0.0);
1258 domain.yd(i) = ydtmp ;
1259
1260 zdtmp = domain.zd(i) + domain.zdd(i) * dt ;
1261 if( FABS(zdtmp) < u_cut ) zdtmp = Real_t(0.0);
1262 domain.zd(i) = zdtmp ;
1263 }
1264 }
1265
1266 /******************************************/
1267
1268 static inline
CalcPositionForNodes(Domain & domain,const Real_t dt,Index_t numNode)1269 void CalcPositionForNodes(Domain &domain, const Real_t dt, Index_t numNode)
1270 {
1271 #ifdef LULESH_USE_OPENMP
1272 #pragma omp parallel for firstprivate(numNode)
1273 #endif
1274 for ( Index_t i = 0 ; i < numNode ; ++i )
1275 {
1276 domain.x(i) += domain.xd(i) * dt ;
1277 domain.y(i) += domain.yd(i) * dt ;
1278 domain.z(i) += domain.zd(i) * dt ;
1279 }
1280 }
1281
1282 /******************************************/
1283
1284 static inline
LagrangeNodal(Domain & domain)1285 void LagrangeNodal(Domain& domain)
1286 {
1287 #ifdef SEDOV_SYNC_POS_VEL_EARLY
1288 Domain_member fieldData[6] ;
1289 #endif
1290
1291 const Real_t delt = domain.deltatime() ;
1292 Real_t u_cut = domain.u_cut() ;
1293
1294 /* time of boundary condition evaluation is beginning of step for force and
1295 * acceleration boundary conditions. */
1296 CalcForceForNodes(domain);
1297
1298 #if USE_MPI
1299 #ifdef SEDOV_SYNC_POS_VEL_EARLY
1300 CommRecv(domain, MSG_SYNC_POS_VEL, 6,
1301 domain.sizeX() + 1, domain.sizeY() + 1, domain.sizeZ() + 1,
1302 false, false) ;
1303 #endif
1304 #endif
1305
1306 CalcAccelerationForNodes(domain, domain.numNode());
1307
1308 ApplyAccelerationBoundaryConditionsForNodes(domain);
1309
1310 CalcVelocityForNodes( domain, delt, u_cut, domain.numNode()) ;
1311
1312 CalcPositionForNodes( domain, delt, domain.numNode() );
1313 #if USE_MPI
1314 #ifdef SEDOV_SYNC_POS_VEL_EARLY
1315 fieldData[0] = &Domain::x ;
1316 fieldData[1] = &Domain::y ;
1317 fieldData[2] = &Domain::z ;
1318 fieldData[3] = &Domain::xd ;
1319 fieldData[4] = &Domain::yd ;
1320 fieldData[5] = &Domain::zd ;
1321
1322 CommSend(domain, MSG_SYNC_POS_VEL, 6, fieldData,
1323 domain.sizeX() + 1, domain.sizeY() + 1, domain.sizeZ() + 1,
1324 false, false) ;
1325 CommSyncPosVel(domain) ;
1326 #endif
1327 #endif
1328
1329 return;
1330 }
1331
1332 /******************************************/
1333
1334 static inline
CalcElemVolume(const Real_t x0,const Real_t x1,const Real_t x2,const Real_t x3,const Real_t x4,const Real_t x5,const Real_t x6,const Real_t x7,const Real_t y0,const Real_t y1,const Real_t y2,const Real_t y3,const Real_t y4,const Real_t y5,const Real_t y6,const Real_t y7,const Real_t z0,const Real_t z1,const Real_t z2,const Real_t z3,const Real_t z4,const Real_t z5,const Real_t z6,const Real_t z7)1335 Real_t CalcElemVolume( const Real_t x0, const Real_t x1,
1336 const Real_t x2, const Real_t x3,
1337 const Real_t x4, const Real_t x5,
1338 const Real_t x6, const Real_t x7,
1339 const Real_t y0, const Real_t y1,
1340 const Real_t y2, const Real_t y3,
1341 const Real_t y4, const Real_t y5,
1342 const Real_t y6, const Real_t y7,
1343 const Real_t z0, const Real_t z1,
1344 const Real_t z2, const Real_t z3,
1345 const Real_t z4, const Real_t z5,
1346 const Real_t z6, const Real_t z7 )
1347 {
1348 Real_t twelveth = Real_t(1.0)/Real_t(12.0);
1349
1350 Real_t dx61 = x6 - x1;
1351 Real_t dy61 = y6 - y1;
1352 Real_t dz61 = z6 - z1;
1353
1354 Real_t dx70 = x7 - x0;
1355 Real_t dy70 = y7 - y0;
1356 Real_t dz70 = z7 - z0;
1357
1358 Real_t dx63 = x6 - x3;
1359 Real_t dy63 = y6 - y3;
1360 Real_t dz63 = z6 - z3;
1361
1362 Real_t dx20 = x2 - x0;
1363 Real_t dy20 = y2 - y0;
1364 Real_t dz20 = z2 - z0;
1365
1366 Real_t dx50 = x5 - x0;
1367 Real_t dy50 = y5 - y0;
1368 Real_t dz50 = z5 - z0;
1369
1370 Real_t dx64 = x6 - x4;
1371 Real_t dy64 = y6 - y4;
1372 Real_t dz64 = z6 - z4;
1373
1374 Real_t dx31 = x3 - x1;
1375 Real_t dy31 = y3 - y1;
1376 Real_t dz31 = z3 - z1;
1377
1378 Real_t dx72 = x7 - x2;
1379 Real_t dy72 = y7 - y2;
1380 Real_t dz72 = z7 - z2;
1381
1382 Real_t dx43 = x4 - x3;
1383 Real_t dy43 = y4 - y3;
1384 Real_t dz43 = z4 - z3;
1385
1386 Real_t dx57 = x5 - x7;
1387 Real_t dy57 = y5 - y7;
1388 Real_t dz57 = z5 - z7;
1389
1390 Real_t dx14 = x1 - x4;
1391 Real_t dy14 = y1 - y4;
1392 Real_t dz14 = z1 - z4;
1393
1394 Real_t dx25 = x2 - x5;
1395 Real_t dy25 = y2 - y5;
1396 Real_t dz25 = z2 - z5;
1397
1398 #define TRIPLE_PRODUCT(x1, y1, z1, x2, y2, z2, x3, y3, z3) \
1399 ((x1)*((y2)*(z3) - (z2)*(y3)) + (x2)*((z1)*(y3) - (y1)*(z3)) + (x3)*((y1)*(z2) - (z1)*(y2)))
1400
1401 Real_t volume =
1402 TRIPLE_PRODUCT(dx31 + dx72, dx63, dx20,
1403 dy31 + dy72, dy63, dy20,
1404 dz31 + dz72, dz63, dz20) +
1405 TRIPLE_PRODUCT(dx43 + dx57, dx64, dx70,
1406 dy43 + dy57, dy64, dy70,
1407 dz43 + dz57, dz64, dz70) +
1408 TRIPLE_PRODUCT(dx14 + dx25, dx61, dx50,
1409 dy14 + dy25, dy61, dy50,
1410 dz14 + dz25, dz61, dz50);
1411
1412 #undef TRIPLE_PRODUCT
1413
1414 volume *= twelveth;
1415
1416 return volume ;
1417 }
1418
1419 /******************************************/
1420
1421 //inline
CalcElemVolume(const Real_t x[8],const Real_t y[8],const Real_t z[8])1422 Real_t CalcElemVolume( const Real_t x[8], const Real_t y[8], const Real_t z[8] )
1423 {
1424 return CalcElemVolume( x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
1425 y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7],
1426 z[0], z[1], z[2], z[3], z[4], z[5], z[6], z[7]);
1427 }
1428
1429 /******************************************/
1430
1431 static inline
AreaFace(const Real_t x0,const Real_t x1,const Real_t x2,const Real_t x3,const Real_t y0,const Real_t y1,const Real_t y2,const Real_t y3,const Real_t z0,const Real_t z1,const Real_t z2,const Real_t z3)1432 Real_t AreaFace( const Real_t x0, const Real_t x1,
1433 const Real_t x2, const Real_t x3,
1434 const Real_t y0, const Real_t y1,
1435 const Real_t y2, const Real_t y3,
1436 const Real_t z0, const Real_t z1,
1437 const Real_t z2, const Real_t z3)
1438 {
1439 Real_t fx = (x2 - x0) - (x3 - x1);
1440 Real_t fy = (y2 - y0) - (y3 - y1);
1441 Real_t fz = (z2 - z0) - (z3 - z1);
1442 Real_t gx = (x2 - x0) + (x3 - x1);
1443 Real_t gy = (y2 - y0) + (y3 - y1);
1444 Real_t gz = (z2 - z0) + (z3 - z1);
1445 Real_t area =
1446 (fx * fx + fy * fy + fz * fz) *
1447 (gx * gx + gy * gy + gz * gz) -
1448 (fx * gx + fy * gy + fz * gz) *
1449 (fx * gx + fy * gy + fz * gz);
1450 return area ;
1451 }
1452
1453 /******************************************/
1454
1455 static inline
CalcElemCharacteristicLength(const Real_t x[8],const Real_t y[8],const Real_t z[8],const Real_t volume)1456 Real_t CalcElemCharacteristicLength( const Real_t x[8],
1457 const Real_t y[8],
1458 const Real_t z[8],
1459 const Real_t volume)
1460 {
1461 Real_t a, charLength = Real_t(0.0);
1462
1463 a = AreaFace(x[0],x[1],x[2],x[3],
1464 y[0],y[1],y[2],y[3],
1465 z[0],z[1],z[2],z[3]) ;
1466 charLength = std::max(a,charLength) ;
1467
1468 a = AreaFace(x[4],x[5],x[6],x[7],
1469 y[4],y[5],y[6],y[7],
1470 z[4],z[5],z[6],z[7]) ;
1471 charLength = std::max(a,charLength) ;
1472
1473 a = AreaFace(x[0],x[1],x[5],x[4],
1474 y[0],y[1],y[5],y[4],
1475 z[0],z[1],z[5],z[4]) ;
1476 charLength = std::max(a,charLength) ;
1477
1478 a = AreaFace(x[1],x[2],x[6],x[5],
1479 y[1],y[2],y[6],y[5],
1480 z[1],z[2],z[6],z[5]) ;
1481 charLength = std::max(a,charLength) ;
1482
1483 a = AreaFace(x[2],x[3],x[7],x[6],
1484 y[2],y[3],y[7],y[6],
1485 z[2],z[3],z[7],z[6]) ;
1486 charLength = std::max(a,charLength) ;
1487
1488 a = AreaFace(x[3],x[0],x[4],x[7],
1489 y[3],y[0],y[4],y[7],
1490 z[3],z[0],z[4],z[7]) ;
1491 charLength = std::max(a,charLength) ;
1492
1493 charLength = Real_t(4.0) * volume / SQRT(charLength);
1494
1495 return charLength;
1496 }
1497
1498 /******************************************/
1499
1500 static inline
CalcElemVelocityGradient(const Real_t * const xvel,const Real_t * const yvel,const Real_t * const zvel,const Real_t b[][8],const Real_t detJ,Real_t * const d)1501 void CalcElemVelocityGradient( const Real_t* const xvel,
1502 const Real_t* const yvel,
1503 const Real_t* const zvel,
1504 const Real_t b[][8],
1505 const Real_t detJ,
1506 Real_t* const d )
1507 {
1508 const Real_t inv_detJ = Real_t(1.0) / detJ ;
1509 Real_t dyddx, dxddy, dzddx, dxddz, dzddy, dyddz;
1510 const Real_t* const pfx = b[0];
1511 const Real_t* const pfy = b[1];
1512 const Real_t* const pfz = b[2];
1513
1514 d[0] = inv_detJ * ( pfx[0] * (xvel[0]-xvel[6])
1515 + pfx[1] * (xvel[1]-xvel[7])
1516 + pfx[2] * (xvel[2]-xvel[4])
1517 + pfx[3] * (xvel[3]-xvel[5]) );
1518
1519 d[1] = inv_detJ * ( pfy[0] * (yvel[0]-yvel[6])
1520 + pfy[1] * (yvel[1]-yvel[7])
1521 + pfy[2] * (yvel[2]-yvel[4])
1522 + pfy[3] * (yvel[3]-yvel[5]) );
1523
1524 d[2] = inv_detJ * ( pfz[0] * (zvel[0]-zvel[6])
1525 + pfz[1] * (zvel[1]-zvel[7])
1526 + pfz[2] * (zvel[2]-zvel[4])
1527 + pfz[3] * (zvel[3]-zvel[5]) );
1528
1529 dyddx = inv_detJ * ( pfx[0] * (yvel[0]-yvel[6])
1530 + pfx[1] * (yvel[1]-yvel[7])
1531 + pfx[2] * (yvel[2]-yvel[4])
1532 + pfx[3] * (yvel[3]-yvel[5]) );
1533
1534 dxddy = inv_detJ * ( pfy[0] * (xvel[0]-xvel[6])
1535 + pfy[1] * (xvel[1]-xvel[7])
1536 + pfy[2] * (xvel[2]-xvel[4])
1537 + pfy[3] * (xvel[3]-xvel[5]) );
1538
1539 dzddx = inv_detJ * ( pfx[0] * (zvel[0]-zvel[6])
1540 + pfx[1] * (zvel[1]-zvel[7])
1541 + pfx[2] * (zvel[2]-zvel[4])
1542 + pfx[3] * (zvel[3]-zvel[5]) );
1543
1544 dxddz = inv_detJ * ( pfz[0] * (xvel[0]-xvel[6])
1545 + pfz[1] * (xvel[1]-xvel[7])
1546 + pfz[2] * (xvel[2]-xvel[4])
1547 + pfz[3] * (xvel[3]-xvel[5]) );
1548
1549 dzddy = inv_detJ * ( pfy[0] * (zvel[0]-zvel[6])
1550 + pfy[1] * (zvel[1]-zvel[7])
1551 + pfy[2] * (zvel[2]-zvel[4])
1552 + pfy[3] * (zvel[3]-zvel[5]) );
1553
1554 dyddz = inv_detJ * ( pfz[0] * (yvel[0]-yvel[6])
1555 + pfz[1] * (yvel[1]-yvel[7])
1556 + pfz[2] * (yvel[2]-yvel[4])
1557 + pfz[3] * (yvel[3]-yvel[5]) );
1558 d[5] = Real_t( .5) * ( dxddy + dyddx );
1559 d[4] = Real_t( .5) * ( dxddz + dzddx );
1560 d[3] = Real_t( .5) * ( dzddy + dyddz );
1561 }
1562
1563 /******************************************/
1564
1565 //static inline
CalcKinematicsForElems(Domain & domain,Real_t * vnew,Real_t deltaTime,Index_t numElem)1566 void CalcKinematicsForElems( Domain &domain, Real_t *vnew,
1567 Real_t deltaTime, Index_t numElem )
1568 {
1569
1570 // loop over all elements
1571 #ifdef LULESH_USE_OPENMP
1572 #pragma omp parallel for firstprivate(numElem, deltaTime)
1573 #endif
1574 for( Index_t k=0 ; k<numElem ; ++k )
1575 {
1576 Real_t B[3][8] ; /** shape function derivatives */
1577 Real_t D[6] ;
1578 Real_t x_local[8] ;
1579 Real_t y_local[8] ;
1580 Real_t z_local[8] ;
1581 Real_t xd_local[8] ;
1582 Real_t yd_local[8] ;
1583 Real_t zd_local[8] ;
1584 Real_t detJ = Real_t(0.0) ;
1585
1586 Real_t volume ;
1587 Real_t relativeVolume ;
1588 const Index_t* const elemToNode = domain.nodelist(k) ;
1589
1590 // get nodal coordinates from global arrays and copy into local arrays.
1591 CollectDomainNodesToElemNodes(domain, elemToNode, x_local, y_local, z_local);
1592
1593 // volume calculations
1594 volume = CalcElemVolume(x_local, y_local, z_local );
1595 relativeVolume = volume / domain.volo(k) ;
1596 vnew[k] = relativeVolume ;
1597 domain.delv(k) = relativeVolume - domain.v(k) ;
1598
1599 // set characteristic length
1600 domain.arealg(k) = CalcElemCharacteristicLength(x_local, y_local, z_local,
1601 volume);
1602
1603 // get nodal velocities from global array and copy into local arrays.
1604 for( Index_t lnode=0 ; lnode<8 ; ++lnode )
1605 {
1606 Index_t gnode = elemToNode[lnode];
1607 xd_local[lnode] = domain.xd(gnode);
1608 yd_local[lnode] = domain.yd(gnode);
1609 zd_local[lnode] = domain.zd(gnode);
1610 }
1611
1612 Real_t dt2 = Real_t(0.5) * deltaTime;
1613 for ( Index_t j=0 ; j<8 ; ++j )
1614 {
1615 x_local[j] -= dt2 * xd_local[j];
1616 y_local[j] -= dt2 * yd_local[j];
1617 z_local[j] -= dt2 * zd_local[j];
1618 }
1619
1620 CalcElemShapeFunctionDerivatives( x_local, y_local, z_local,
1621 B, &detJ );
1622
1623 CalcElemVelocityGradient( xd_local, yd_local, zd_local,
1624 B, detJ, D );
1625
1626 // put velocity gradient quantities into their global arrays.
1627 domain.dxx(k) = D[0];
1628 domain.dyy(k) = D[1];
1629 domain.dzz(k) = D[2];
1630 }
1631 }
1632
1633 /******************************************/
1634
1635 static inline
CalcLagrangeElements(Domain & domain,Real_t * vnew)1636 void CalcLagrangeElements(Domain& domain, Real_t* vnew)
1637 {
1638 Index_t numElem = domain.numElem() ;
1639 if (numElem > 0) {
1640 const Real_t deltatime = domain.deltatime() ;
1641
1642 domain.AllocateStrains(numElem);
1643
1644 CalcKinematicsForElems(domain, vnew, deltatime, numElem) ;
1645
1646 // element loop to do some stuff not included in the elemlib function.
1647 #ifdef LULESH_USE_OPENMP
1648 #pragma omp parallel for firstprivate(numElem)
1649 #endif
1650 for ( Index_t k=0 ; k<numElem ; ++k )
1651 {
1652 // calc strain rate and apply as constraint (only done in FB element)
1653 Real_t vdov = domain.dxx(k) + domain.dyy(k) + domain.dzz(k) ;
1654 Real_t vdovthird = vdov/Real_t(3.0) ;
1655
1656 // make the rate of deformation tensor deviatoric
1657 domain.vdov(k) = vdov ;
1658 domain.dxx(k) -= vdovthird ;
1659 domain.dyy(k) -= vdovthird ;
1660 domain.dzz(k) -= vdovthird ;
1661
1662 // See if any volumes are negative, and take appropriate action.
1663 if (vnew[k] <= Real_t(0.0))
1664 {
1665 #if USE_MPI
1666 MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
1667 #else
1668 exit(VolumeError);
1669 #endif
1670 }
1671 }
1672 domain.DeallocateStrains();
1673 }
1674 }
1675
1676 /******************************************/
1677
1678 static inline
CalcMonotonicQGradientsForElems(Domain & domain,Real_t vnew[])1679 void CalcMonotonicQGradientsForElems(Domain& domain, Real_t vnew[])
1680 {
1681 Index_t numElem = domain.numElem();
1682
1683 #ifdef LULESH_USE_OPENMP
1684 #pragma omp parallel for firstprivate(numElem)
1685 #endif
1686 for (Index_t i = 0 ; i < numElem ; ++i ) {
1687 const Real_t ptiny = Real_t(1.e-36) ;
1688 Real_t ax,ay,az ;
1689 Real_t dxv,dyv,dzv ;
1690
1691 const Index_t *elemToNode = domain.nodelist(i);
1692 Index_t n0 = elemToNode[0] ;
1693 Index_t n1 = elemToNode[1] ;
1694 Index_t n2 = elemToNode[2] ;
1695 Index_t n3 = elemToNode[3] ;
1696 Index_t n4 = elemToNode[4] ;
1697 Index_t n5 = elemToNode[5] ;
1698 Index_t n6 = elemToNode[6] ;
1699 Index_t n7 = elemToNode[7] ;
1700
1701 Real_t x0 = domain.x(n0) ;
1702 Real_t x1 = domain.x(n1) ;
1703 Real_t x2 = domain.x(n2) ;
1704 Real_t x3 = domain.x(n3) ;
1705 Real_t x4 = domain.x(n4) ;
1706 Real_t x5 = domain.x(n5) ;
1707 Real_t x6 = domain.x(n6) ;
1708 Real_t x7 = domain.x(n7) ;
1709
1710 Real_t y0 = domain.y(n0) ;
1711 Real_t y1 = domain.y(n1) ;
1712 Real_t y2 = domain.y(n2) ;
1713 Real_t y3 = domain.y(n3) ;
1714 Real_t y4 = domain.y(n4) ;
1715 Real_t y5 = domain.y(n5) ;
1716 Real_t y6 = domain.y(n6) ;
1717 Real_t y7 = domain.y(n7) ;
1718
1719 Real_t z0 = domain.z(n0) ;
1720 Real_t z1 = domain.z(n1) ;
1721 Real_t z2 = domain.z(n2) ;
1722 Real_t z3 = domain.z(n3) ;
1723 Real_t z4 = domain.z(n4) ;
1724 Real_t z5 = domain.z(n5) ;
1725 Real_t z6 = domain.z(n6) ;
1726 Real_t z7 = domain.z(n7) ;
1727
1728 Real_t xv0 = domain.xd(n0) ;
1729 Real_t xv1 = domain.xd(n1) ;
1730 Real_t xv2 = domain.xd(n2) ;
1731 Real_t xv3 = domain.xd(n3) ;
1732 Real_t xv4 = domain.xd(n4) ;
1733 Real_t xv5 = domain.xd(n5) ;
1734 Real_t xv6 = domain.xd(n6) ;
1735 Real_t xv7 = domain.xd(n7) ;
1736
1737 Real_t yv0 = domain.yd(n0) ;
1738 Real_t yv1 = domain.yd(n1) ;
1739 Real_t yv2 = domain.yd(n2) ;
1740 Real_t yv3 = domain.yd(n3) ;
1741 Real_t yv4 = domain.yd(n4) ;
1742 Real_t yv5 = domain.yd(n5) ;
1743 Real_t yv6 = domain.yd(n6) ;
1744 Real_t yv7 = domain.yd(n7) ;
1745
1746 Real_t zv0 = domain.zd(n0) ;
1747 Real_t zv1 = domain.zd(n1) ;
1748 Real_t zv2 = domain.zd(n2) ;
1749 Real_t zv3 = domain.zd(n3) ;
1750 Real_t zv4 = domain.zd(n4) ;
1751 Real_t zv5 = domain.zd(n5) ;
1752 Real_t zv6 = domain.zd(n6) ;
1753 Real_t zv7 = domain.zd(n7) ;
1754
1755 Real_t vol = domain.volo(i)*vnew[i] ;
1756 Real_t norm = Real_t(1.0) / ( vol + ptiny ) ;
1757
1758 Real_t dxj = Real_t(-0.25)*((x0+x1+x5+x4) - (x3+x2+x6+x7)) ;
1759 Real_t dyj = Real_t(-0.25)*((y0+y1+y5+y4) - (y3+y2+y6+y7)) ;
1760 Real_t dzj = Real_t(-0.25)*((z0+z1+z5+z4) - (z3+z2+z6+z7)) ;
1761
1762 Real_t dxi = Real_t( 0.25)*((x1+x2+x6+x5) - (x0+x3+x7+x4)) ;
1763 Real_t dyi = Real_t( 0.25)*((y1+y2+y6+y5) - (y0+y3+y7+y4)) ;
1764 Real_t dzi = Real_t( 0.25)*((z1+z2+z6+z5) - (z0+z3+z7+z4)) ;
1765
1766 Real_t dxk = Real_t( 0.25)*((x4+x5+x6+x7) - (x0+x1+x2+x3)) ;
1767 Real_t dyk = Real_t( 0.25)*((y4+y5+y6+y7) - (y0+y1+y2+y3)) ;
1768 Real_t dzk = Real_t( 0.25)*((z4+z5+z6+z7) - (z0+z1+z2+z3)) ;
1769
1770 /* find delvk and delxk ( i cross j ) */
1771
1772 ax = dyi*dzj - dzi*dyj ;
1773 ay = dzi*dxj - dxi*dzj ;
1774 az = dxi*dyj - dyi*dxj ;
1775
1776 domain.delx_zeta(i) = vol / SQRT(ax*ax + ay*ay + az*az + ptiny) ;
1777
1778 ax *= norm ;
1779 ay *= norm ;
1780 az *= norm ;
1781
1782 dxv = Real_t(0.25)*((xv4+xv5+xv6+xv7) - (xv0+xv1+xv2+xv3)) ;
1783 dyv = Real_t(0.25)*((yv4+yv5+yv6+yv7) - (yv0+yv1+yv2+yv3)) ;
1784 dzv = Real_t(0.25)*((zv4+zv5+zv6+zv7) - (zv0+zv1+zv2+zv3)) ;
1785
1786 domain.delv_zeta(i) = ax*dxv + ay*dyv + az*dzv ;
1787
1788 /* find delxi and delvi ( j cross k ) */
1789
1790 ax = dyj*dzk - dzj*dyk ;
1791 ay = dzj*dxk - dxj*dzk ;
1792 az = dxj*dyk - dyj*dxk ;
1793
1794 domain.delx_xi(i) = vol / SQRT(ax*ax + ay*ay + az*az + ptiny) ;
1795
1796 ax *= norm ;
1797 ay *= norm ;
1798 az *= norm ;
1799
1800 dxv = Real_t(0.25)*((xv1+xv2+xv6+xv5) - (xv0+xv3+xv7+xv4)) ;
1801 dyv = Real_t(0.25)*((yv1+yv2+yv6+yv5) - (yv0+yv3+yv7+yv4)) ;
1802 dzv = Real_t(0.25)*((zv1+zv2+zv6+zv5) - (zv0+zv3+zv7+zv4)) ;
1803
1804 domain.delv_xi(i) = ax*dxv + ay*dyv + az*dzv ;
1805
1806 /* find delxj and delvj ( k cross i ) */
1807
1808 ax = dyk*dzi - dzk*dyi ;
1809 ay = dzk*dxi - dxk*dzi ;
1810 az = dxk*dyi - dyk*dxi ;
1811
1812 domain.delx_eta(i) = vol / SQRT(ax*ax + ay*ay + az*az + ptiny) ;
1813
1814 ax *= norm ;
1815 ay *= norm ;
1816 az *= norm ;
1817
1818 dxv = Real_t(-0.25)*((xv0+xv1+xv5+xv4) - (xv3+xv2+xv6+xv7)) ;
1819 dyv = Real_t(-0.25)*((yv0+yv1+yv5+yv4) - (yv3+yv2+yv6+yv7)) ;
1820 dzv = Real_t(-0.25)*((zv0+zv1+zv5+zv4) - (zv3+zv2+zv6+zv7)) ;
1821
1822 domain.delv_eta(i) = ax*dxv + ay*dyv + az*dzv ;
1823 }
1824 }
1825
1826 /******************************************/
1827
1828 static inline
CalcMonotonicQRegionForElems(Domain & domain,Int_t r,Real_t vnew[],Real_t ptiny)1829 void CalcMonotonicQRegionForElems(Domain &domain, Int_t r,
1830 Real_t vnew[], Real_t ptiny)
1831 {
1832 Real_t monoq_limiter_mult = domain.monoq_limiter_mult();
1833 Real_t monoq_max_slope = domain.monoq_max_slope();
1834 Real_t qlc_monoq = domain.qlc_monoq();
1835 Real_t qqc_monoq = domain.qqc_monoq();
1836
1837 #ifdef LULESH_USE_OPENMP
1838 #pragma omp parallel for firstprivate(qlc_monoq, qqc_monoq, monoq_limiter_mult, monoq_max_slope, ptiny)
1839 #endif
1840 for ( Index_t ielem = 0 ; ielem < domain.regElemSize(r); ++ielem ) {
1841 Index_t i = domain.regElemlist(r,ielem);
1842 Real_t qlin, qquad ;
1843 Real_t phixi, phieta, phizeta ;
1844 Int_t bcMask = domain.elemBC(i) ;
1845 Real_t delvm = 0.0, delvp =0.0;
1846
1847 /* phixi */
1848 Real_t norm = Real_t(1.) / (domain.delv_xi(i)+ ptiny ) ;
1849
1850 switch (bcMask & XI_M) {
1851 case XI_M_COMM: /* needs comm data */
1852 case 0: delvm = domain.delv_xi(domain.lxim(i)); break ;
1853 case XI_M_SYMM: delvm = domain.delv_xi(i) ; break ;
1854 case XI_M_FREE: delvm = Real_t(0.0) ; break ;
1855 default: fprintf(stderr, "Error in switch at %s line %d\n",
1856 __FILE__, __LINE__);
1857 delvm = 0; /* ERROR - but quiets the compiler */
1858 break;
1859 }
1860 switch (bcMask & XI_P) {
1861 case XI_P_COMM: /* needs comm data */
1862 case 0: delvp = domain.delv_xi(domain.lxip(i)) ; break ;
1863 case XI_P_SYMM: delvp = domain.delv_xi(i) ; break ;
1864 case XI_P_FREE: delvp = Real_t(0.0) ; break ;
1865 default: fprintf(stderr, "Error in switch at %s line %d\n",
1866 __FILE__, __LINE__);
1867 delvp = 0; /* ERROR - but quiets the compiler */
1868 break;
1869 }
1870
1871 delvm = delvm * norm ;
1872 delvp = delvp * norm ;
1873
1874 phixi = Real_t(.5) * ( delvm + delvp ) ;
1875
1876 delvm *= monoq_limiter_mult ;
1877 delvp *= monoq_limiter_mult ;
1878
1879 if ( delvm < phixi ) phixi = delvm ;
1880 if ( delvp < phixi ) phixi = delvp ;
1881 if ( phixi < Real_t(0.)) phixi = Real_t(0.) ;
1882 if ( phixi > monoq_max_slope) phixi = monoq_max_slope;
1883
1884
1885 /* phieta */
1886 norm = Real_t(1.) / ( domain.delv_eta(i) + ptiny ) ;
1887
1888 switch (bcMask & ETA_M) {
1889 case ETA_M_COMM: /* needs comm data */
1890 case 0: delvm = domain.delv_eta(domain.letam(i)) ; break ;
1891 case ETA_M_SYMM: delvm = domain.delv_eta(i) ; break ;
1892 case ETA_M_FREE: delvm = Real_t(0.0) ; break ;
1893 default: fprintf(stderr, "Error in switch at %s line %d\n",
1894 __FILE__, __LINE__);
1895 delvm = 0; /* ERROR - but quiets the compiler */
1896 break;
1897 }
1898 switch (bcMask & ETA_P) {
1899 case ETA_P_COMM: /* needs comm data */
1900 case 0: delvp = domain.delv_eta(domain.letap(i)) ; break ;
1901 case ETA_P_SYMM: delvp = domain.delv_eta(i) ; break ;
1902 case ETA_P_FREE: delvp = Real_t(0.0) ; break ;
1903 default: fprintf(stderr, "Error in switch at %s line %d\n",
1904 __FILE__, __LINE__);
1905 delvp = 0; /* ERROR - but quiets the compiler */
1906 break;
1907 }
1908
1909 delvm = delvm * norm ;
1910 delvp = delvp * norm ;
1911
1912 phieta = Real_t(.5) * ( delvm + delvp ) ;
1913
1914 delvm *= monoq_limiter_mult ;
1915 delvp *= monoq_limiter_mult ;
1916
1917 if ( delvm < phieta ) phieta = delvm ;
1918 if ( delvp < phieta ) phieta = delvp ;
1919 if ( phieta < Real_t(0.)) phieta = Real_t(0.) ;
1920 if ( phieta > monoq_max_slope) phieta = monoq_max_slope;
1921
1922 /* phizeta */
1923 norm = Real_t(1.) / ( domain.delv_zeta(i) + ptiny ) ;
1924
1925 switch (bcMask & ZETA_M) {
1926 case ZETA_M_COMM: /* needs comm data */
1927 case 0: delvm = domain.delv_zeta(domain.lzetam(i)) ; break ;
1928 case ZETA_M_SYMM: delvm = domain.delv_zeta(i) ; break ;
1929 case ZETA_M_FREE: delvm = Real_t(0.0) ; break ;
1930 default: fprintf(stderr, "Error in switch at %s line %d\n",
1931 __FILE__, __LINE__);
1932 delvm = 0; /* ERROR - but quiets the compiler */
1933 break;
1934 }
1935 switch (bcMask & ZETA_P) {
1936 case ZETA_P_COMM: /* needs comm data */
1937 case 0: delvp = domain.delv_zeta(domain.lzetap(i)) ; break ;
1938 case ZETA_P_SYMM: delvp = domain.delv_zeta(i) ; break ;
1939 case ZETA_P_FREE: delvp = Real_t(0.0) ; break ;
1940 default: fprintf(stderr, "Error in switch at %s line %d\n",
1941 __FILE__, __LINE__);
1942 delvp = 0; /* ERROR - but quiets the compiler */
1943 break;
1944 }
1945
1946 delvm = delvm * norm ;
1947 delvp = delvp * norm ;
1948
1949 phizeta = Real_t(.5) * ( delvm + delvp ) ;
1950
1951 delvm *= monoq_limiter_mult ;
1952 delvp *= monoq_limiter_mult ;
1953
1954 if ( delvm < phizeta ) phizeta = delvm ;
1955 if ( delvp < phizeta ) phizeta = delvp ;
1956 if ( phizeta < Real_t(0.)) phizeta = Real_t(0.);
1957 if ( phizeta > monoq_max_slope ) phizeta = monoq_max_slope;
1958
1959 /* Remove length scale */
1960
1961 if ( domain.vdov(i) > Real_t(0.) ) {
1962 qlin = Real_t(0.) ;
1963 qquad = Real_t(0.) ;
1964 }
1965 else {
1966 Real_t delvxxi = domain.delv_xi(i) * domain.delx_xi(i) ;
1967 Real_t delvxeta = domain.delv_eta(i) * domain.delx_eta(i) ;
1968 Real_t delvxzeta = domain.delv_zeta(i) * domain.delx_zeta(i) ;
1969
1970 if ( delvxxi > Real_t(0.) ) delvxxi = Real_t(0.) ;
1971 if ( delvxeta > Real_t(0.) ) delvxeta = Real_t(0.) ;
1972 if ( delvxzeta > Real_t(0.) ) delvxzeta = Real_t(0.) ;
1973
1974 Real_t rho = domain.elemMass(i) / (domain.volo(i) * vnew[i]) ;
1975
1976 qlin = -qlc_monoq * rho *
1977 ( delvxxi * (Real_t(1.) - phixi) +
1978 delvxeta * (Real_t(1.) - phieta) +
1979 delvxzeta * (Real_t(1.) - phizeta) ) ;
1980
1981 qquad = qqc_monoq * rho *
1982 ( delvxxi*delvxxi * (Real_t(1.) - phixi*phixi) +
1983 delvxeta*delvxeta * (Real_t(1.) - phieta*phieta) +
1984 delvxzeta*delvxzeta * (Real_t(1.) - phizeta*phizeta) ) ;
1985 }
1986
1987 domain.qq(i) = qquad ;
1988 domain.ql(i) = qlin ;
1989 }
1990 }
1991
1992 /******************************************/
1993
1994 static inline
CalcMonotonicQForElems(Domain & domain,Real_t vnew[])1995 void CalcMonotonicQForElems(Domain& domain, Real_t vnew[])
1996 {
1997 //
1998 // initialize parameters
1999 //
2000 const Real_t ptiny = Real_t(1.e-36) ;
2001
2002 //
2003 // calculate the monotonic q for all regions
2004 //
2005 for (Index_t r=0 ; r<domain.numReg() ; ++r) {
2006
2007 if (domain.regElemSize(r) > 0) {
2008 CalcMonotonicQRegionForElems(domain, r, vnew, ptiny) ;
2009 }
2010 }
2011 }
2012
2013 /******************************************/
2014
2015 static inline
CalcQForElems(Domain & domain,Real_t vnew[])2016 void CalcQForElems(Domain& domain, Real_t vnew[])
2017 {
2018 //
2019 // MONOTONIC Q option
2020 //
2021
2022 Index_t numElem = domain.numElem() ;
2023
2024 if (numElem != 0) {
2025 Int_t allElem = numElem + /* local elem */
2026 2*domain.sizeX()*domain.sizeY() + /* plane ghosts */
2027 2*domain.sizeX()*domain.sizeZ() + /* row ghosts */
2028 2*domain.sizeY()*domain.sizeZ() ; /* col ghosts */
2029
2030 domain.AllocateGradients(numElem, allElem);
2031
2032 #if USE_MPI
2033 CommRecv(domain, MSG_MONOQ, 3,
2034 domain.sizeX(), domain.sizeY(), domain.sizeZ(),
2035 true, true) ;
2036 #endif
2037
2038 /* Calculate velocity gradients */
2039 CalcMonotonicQGradientsForElems(domain, vnew);
2040
2041 #if USE_MPI
2042 Domain_member fieldData[3] ;
2043
2044 /* Transfer veloctiy gradients in the first order elements */
2045 /* problem->commElements->Transfer(CommElements::monoQ) ; */
2046
2047 fieldData[0] = &Domain::delv_xi ;
2048 fieldData[1] = &Domain::delv_eta ;
2049 fieldData[2] = &Domain::delv_zeta ;
2050
2051 CommSend(domain, MSG_MONOQ, 3, fieldData,
2052 domain.sizeX(), domain.sizeY(), domain.sizeZ(),
2053 true, true) ;
2054
2055 CommMonoQ(domain) ;
2056 #endif
2057
2058 CalcMonotonicQForElems(domain, vnew) ;
2059
2060 // Free up memory
2061 domain.DeallocateGradients();
2062
2063 /* Don't allow excessive artificial viscosity */
2064 Index_t idx = -1;
2065 for (Index_t i=0; i<numElem; ++i) {
2066 if ( domain.q(i) > domain.qstop() ) {
2067 idx = i ;
2068 break ;
2069 }
2070 }
2071
2072 if(idx >= 0) {
2073 #if USE_MPI
2074 MPI_Abort(MPI_COMM_WORLD, QStopError) ;
2075 #else
2076 exit(QStopError);
2077 #endif
2078 }
2079 }
2080 }
2081
2082 /******************************************/
2083
2084 static inline
CalcPressureForElems(Real_t * p_new,Real_t * bvc,Real_t * pbvc,Real_t * e_old,Real_t * compression,Real_t * vnewc,Real_t pmin,Real_t p_cut,Real_t eosvmax,Index_t length,Index_t * regElemList)2085 void CalcPressureForElems(Real_t* p_new, Real_t* bvc,
2086 Real_t* pbvc, Real_t* e_old,
2087 Real_t* compression, Real_t *vnewc,
2088 Real_t pmin,
2089 Real_t p_cut, Real_t eosvmax,
2090 Index_t length, Index_t *regElemList)
2091 {
2092 #ifdef LULESH_USE_OPENMP
2093 #pragma omp parallel for firstprivate(length)
2094 #endif
2095 for (Index_t i = 0; i < length ; ++i) {
2096 Real_t c1s = Real_t(2.0)/Real_t(3.0) ;
2097 bvc[i] = c1s * (compression[i] + Real_t(1.));
2098 pbvc[i] = c1s;
2099 }
2100 #ifdef LULESH_USE_OPENMP
2101 #pragma omp parallel for firstprivate(length, pmin, p_cut, eosvmax)
2102 #endif
2103 for (Index_t i = 0 ; i < length ; ++i){
2104 Index_t elem = regElemList[i];
2105
2106 p_new[i] = bvc[i] * e_old[i] ;
2107
2108 if (FABS(p_new[i]) < p_cut )
2109 p_new[i] = Real_t(0.0) ;
2110
2111 if ( vnewc[elem] >= eosvmax ) /* impossible condition here? */
2112 p_new[i] = Real_t(0.0) ;
2113
2114 if (p_new[i] < pmin)
2115 p_new[i] = pmin ;
2116 }
2117 }
2118
2119 /******************************************/
2120
2121 static inline
CalcEnergyForElems(Real_t * p_new,Real_t * e_new,Real_t * q_new,Real_t * bvc,Real_t * pbvc,Real_t * p_old,Real_t * e_old,Real_t * q_old,Real_t * compression,Real_t * compHalfStep,Real_t * vnewc,Real_t * work,Real_t * delvc,Real_t pmin,Real_t p_cut,Real_t e_cut,Real_t q_cut,Real_t emin,Real_t * qq_old,Real_t * ql_old,Real_t rho0,Real_t eosvmax,Index_t length,Index_t * regElemList)2122 void CalcEnergyForElems(Real_t* p_new, Real_t* e_new, Real_t* q_new,
2123 Real_t* bvc, Real_t* pbvc,
2124 Real_t* p_old, Real_t* e_old, Real_t* q_old,
2125 Real_t* compression, Real_t* compHalfStep,
2126 Real_t* vnewc, Real_t* work, Real_t* delvc, Real_t pmin,
2127 Real_t p_cut, Real_t e_cut, Real_t q_cut, Real_t emin,
2128 Real_t* qq_old, Real_t* ql_old,
2129 Real_t rho0,
2130 Real_t eosvmax,
2131 Index_t length, Index_t *regElemList)
2132 {
2133 Real_t *pHalfStep = Allocate<Real_t>(length) ;
2134 #ifdef LULESH_USE_OPENMP
2135 #pragma omp parallel for firstprivate(length, emin)
2136 #endif
2137 for (Index_t i = 0 ; i < length ; ++i) {
2138 e_new[i] = e_old[i] - Real_t(0.5) * delvc[i] * (p_old[i] + q_old[i])
2139 + Real_t(0.5) * work[i];
2140
2141 if (e_new[i] < emin ) {
2142 e_new[i] = emin ;
2143 }
2144 }
2145
2146 CalcPressureForElems(pHalfStep, bvc, pbvc, e_new, compHalfStep, vnewc,
2147 pmin, p_cut, eosvmax, length, regElemList);
2148
2149 #ifdef LULESH_USE_OPENMP
2150 #pragma omp parallel for firstprivate(length, rho0)
2151 #endif
2152 for (Index_t i = 0 ; i < length ; ++i) {
2153 Real_t vhalf = Real_t(1.) / (Real_t(1.) + compHalfStep[i]) ;
2154
2155 if ( delvc[i] > Real_t(0.) ) {
2156 q_new[i] /* = qq_old[i] = ql_old[i] */ = Real_t(0.) ;
2157 }
2158 else {
2159 Real_t ssc = ( pbvc[i] * e_new[i]
2160 + vhalf * vhalf * bvc[i] * pHalfStep[i] ) / rho0 ;
2161
2162 if ( ssc <= Real_t(.1111111e-36) ) {
2163 ssc = Real_t(.3333333e-18) ;
2164 } else {
2165 ssc = SQRT(ssc) ;
2166 }
2167
2168 q_new[i] = (ssc*ql_old[i] + qq_old[i]) ;
2169 }
2170
2171 e_new[i] = e_new[i] + Real_t(0.5) * delvc[i]
2172 * ( Real_t(3.0)*(p_old[i] + q_old[i])
2173 - Real_t(4.0)*(pHalfStep[i] + q_new[i])) ;
2174 }
2175
2176 #ifdef LULESH_USE_OPENMP
2177 #pragma omp parallel for firstprivate(length, emin, e_cut)
2178 #endif
2179 for (Index_t i = 0 ; i < length ; ++i) {
2180
2181 e_new[i] += Real_t(0.5) * work[i];
2182
2183 if (FABS(e_new[i]) < e_cut) {
2184 e_new[i] = Real_t(0.) ;
2185 }
2186 if ( e_new[i] < emin ) {
2187 e_new[i] = emin ;
2188 }
2189 }
2190
2191 CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc,
2192 pmin, p_cut, eosvmax, length, regElemList);
2193
2194 #ifdef LULESH_USE_OPENMP
2195 #pragma omp parallel for firstprivate(length, rho0, emin, e_cut)
2196 #endif
2197 for (Index_t i = 0 ; i < length ; ++i){
2198 const Real_t sixth = Real_t(1.0) / Real_t(6.0) ;
2199 Index_t elem = regElemList[i];
2200 Real_t q_tilde ;
2201
2202 if (delvc[i] > Real_t(0.)) {
2203 q_tilde = Real_t(0.) ;
2204 }
2205 else {
2206 Real_t ssc = ( pbvc[i] * e_new[i]
2207 + vnewc[elem] * vnewc[elem] * bvc[i] * p_new[i] ) / rho0 ;
2208
2209 if ( ssc <= Real_t(.1111111e-36) ) {
2210 ssc = Real_t(.3333333e-18) ;
2211 } else {
2212 ssc = SQRT(ssc) ;
2213 }
2214
2215 q_tilde = (ssc*ql_old[i] + qq_old[i]) ;
2216 }
2217
2218 e_new[i] = e_new[i] - ( Real_t(7.0)*(p_old[i] + q_old[i])
2219 - Real_t(8.0)*(pHalfStep[i] + q_new[i])
2220 + (p_new[i] + q_tilde)) * delvc[i]*sixth ;
2221
2222 if (FABS(e_new[i]) < e_cut) {
2223 e_new[i] = Real_t(0.) ;
2224 }
2225 if ( e_new[i] < emin ) {
2226 e_new[i] = emin ;
2227 }
2228 }
2229
2230 CalcPressureForElems(p_new, bvc, pbvc, e_new, compression, vnewc,
2231 pmin, p_cut, eosvmax, length, regElemList);
2232
2233 #ifdef LULESH_USE_OPENMP
2234 #pragma omp parallel for firstprivate(length, rho0, q_cut)
2235 #endif
2236 for (Index_t i = 0 ; i < length ; ++i){
2237 Index_t elem = regElemList[i];
2238
2239 if ( delvc[i] <= Real_t(0.) ) {
2240 Real_t ssc = ( pbvc[i] * e_new[i]
2241 + vnewc[elem] * vnewc[elem] * bvc[i] * p_new[i] ) / rho0 ;
2242
2243 if ( ssc <= Real_t(.1111111e-36) ) {
2244 ssc = Real_t(.3333333e-18) ;
2245 } else {
2246 ssc = SQRT(ssc) ;
2247 }
2248
2249 q_new[i] = (ssc*ql_old[i] + qq_old[i]) ;
2250
2251 if (FABS(q_new[i]) < q_cut) q_new[i] = Real_t(0.) ;
2252 }
2253 }
2254
2255 Release(&pHalfStep) ;
2256
2257 return ;
2258 }
2259
2260 /******************************************/
2261
2262 static inline
CalcSoundSpeedForElems(Domain & domain,Real_t * vnewc,Real_t rho0,Real_t * enewc,Real_t * pnewc,Real_t * pbvc,Real_t * bvc,Real_t ss4o3,Index_t len,Index_t * regElemList)2263 void CalcSoundSpeedForElems(Domain &domain,
2264 Real_t *vnewc, Real_t rho0, Real_t *enewc,
2265 Real_t *pnewc, Real_t *pbvc,
2266 Real_t *bvc, Real_t ss4o3,
2267 Index_t len, Index_t *regElemList)
2268 {
2269 #ifdef LULESH_USE_OPENMP
2270 #pragma omp parallel for firstprivate(rho0, ss4o3)
2271 #endif
2272 for (Index_t i = 0; i < len ; ++i) {
2273 Index_t elem = regElemList[i];
2274 Real_t ssTmp = (pbvc[i] * enewc[i] + vnewc[elem] * vnewc[elem] *
2275 bvc[i] * pnewc[i]) / rho0;
2276 if (ssTmp <= Real_t(.1111111e-36)) {
2277 ssTmp = Real_t(.3333333e-18);
2278 }
2279 else {
2280 ssTmp = SQRT(ssTmp);
2281 }
2282 domain.ss(elem) = ssTmp ;
2283 }
2284 }
2285
2286 /******************************************/
2287
2288 static inline
EvalEOSForElems(Domain & domain,Real_t * vnewc,Int_t numElemReg,Index_t * regElemList,Int_t rep)2289 void EvalEOSForElems(Domain& domain, Real_t *vnewc,
2290 Int_t numElemReg, Index_t *regElemList, Int_t rep)
2291 {
2292 Real_t e_cut = domain.e_cut() ;
2293 Real_t p_cut = domain.p_cut() ;
2294 Real_t ss4o3 = domain.ss4o3() ;
2295 Real_t q_cut = domain.q_cut() ;
2296
2297 Real_t eosvmax = domain.eosvmax() ;
2298 Real_t eosvmin = domain.eosvmin() ;
2299 Real_t pmin = domain.pmin() ;
2300 Real_t emin = domain.emin() ;
2301 Real_t rho0 = domain.refdens() ;
2302
2303 // These temporaries will be of different size for
2304 // each call (due to different sized region element
2305 // lists)
2306 Real_t *e_old = Allocate<Real_t>(numElemReg) ;
2307 Real_t *delvc = Allocate<Real_t>(numElemReg) ;
2308 Real_t *p_old = Allocate<Real_t>(numElemReg) ;
2309 Real_t *q_old = Allocate<Real_t>(numElemReg) ;
2310 Real_t *compression = Allocate<Real_t>(numElemReg) ;
2311 Real_t *compHalfStep = Allocate<Real_t>(numElemReg) ;
2312 Real_t *qq_old = Allocate<Real_t>(numElemReg) ;
2313 Real_t *ql_old = Allocate<Real_t>(numElemReg) ;
2314 Real_t *work = Allocate<Real_t>(numElemReg) ;
2315 Real_t *p_new = Allocate<Real_t>(numElemReg) ;
2316 Real_t *e_new = Allocate<Real_t>(numElemReg) ;
2317 Real_t *q_new = Allocate<Real_t>(numElemReg) ;
2318 Real_t *bvc = Allocate<Real_t>(numElemReg) ;
2319 Real_t *pbvc = Allocate<Real_t>(numElemReg) ;
2320
2321 //loop to add load imbalance based on region number
2322 for(Int_t j = 0; j < rep; j++) {
2323 /* compress data, minimal set */
2324 #ifdef LULESH_USE_OPENMP
2325 #pragma omp parallel
2326 #endif
2327 {
2328 #ifdef LULESH_USE_OPENMP
2329 #pragma omp for nowait firstprivate(numElemReg)
2330 #endif
2331 for (Index_t i=0; i<numElemReg; ++i) {
2332 Index_t elem = regElemList[i];
2333 e_old[i] = domain.e(elem) ;
2334 delvc[i] = domain.delv(elem) ;
2335 p_old[i] = domain.p(elem) ;
2336 q_old[i] = domain.q(elem) ;
2337 qq_old[i] = domain.qq(elem) ;
2338 ql_old[i] = domain.ql(elem) ;
2339 }
2340
2341 #ifdef LULESH_USE_OPENMP
2342 #pragma omp for firstprivate(numElemReg)
2343 #endif
2344 for (Index_t i = 0; i < numElemReg ; ++i) {
2345 Index_t elem = regElemList[i];
2346 Real_t vchalf ;
2347 compression[i] = Real_t(1.) / vnewc[elem] - Real_t(1.);
2348 vchalf = vnewc[elem] - delvc[i] * Real_t(.5);
2349 compHalfStep[i] = Real_t(1.) / vchalf - Real_t(1.);
2350 }
2351
2352 /* Check for v > eosvmax or v < eosvmin */
2353 if ( eosvmin != Real_t(0.) ) {
2354 #ifdef LULESH_USE_OPENMP
2355 #pragma omp for nowait firstprivate(numElemReg, eosvmin)
2356 #endif
2357 for(Index_t i=0 ; i<numElemReg ; ++i) {
2358 Index_t elem = regElemList[i];
2359 if (vnewc[elem] <= eosvmin) { /* impossible due to calling func? */
2360 compHalfStep[i] = compression[i] ;
2361 }
2362 }
2363 }
2364 if ( eosvmax != Real_t(0.) ) {
2365 #ifdef LULESH_USE_OPENMP
2366 #pragma omp for nowait firstprivate(numElemReg, eosvmax)
2367 #endif
2368 for(Index_t i=0 ; i<numElemReg ; ++i) {
2369 Index_t elem = regElemList[i];
2370 if (vnewc[elem] >= eosvmax) { /* impossible due to calling func? */
2371 p_old[i] = Real_t(0.) ;
2372 compression[i] = Real_t(0.) ;
2373 compHalfStep[i] = Real_t(0.) ;
2374 }
2375 }
2376 }
2377
2378 #ifdef LULESH_USE_OPENMP
2379 #pragma omp for nowait firstprivate(numElemReg)
2380 #endif
2381 for (Index_t i = 0 ; i < numElemReg ; ++i) {
2382 work[i] = Real_t(0.) ;
2383 }
2384 }
2385 CalcEnergyForElems(p_new, e_new, q_new, bvc, pbvc,
2386 p_old, e_old, q_old, compression, compHalfStep,
2387 vnewc, work, delvc, pmin,
2388 p_cut, e_cut, q_cut, emin,
2389 qq_old, ql_old, rho0, eosvmax,
2390 numElemReg, regElemList);
2391 }
2392
2393 #ifdef LULESH_USE_OPENMP
2394 #pragma omp parallel for firstprivate(numElemReg)
2395 #endif
2396 for (Index_t i=0; i<numElemReg; ++i) {
2397 Index_t elem = regElemList[i];
2398 domain.p(elem) = p_new[i] ;
2399 domain.e(elem) = e_new[i] ;
2400 domain.q(elem) = q_new[i] ;
2401 }
2402
2403 CalcSoundSpeedForElems(domain,
2404 vnewc, rho0, e_new, p_new,
2405 pbvc, bvc, ss4o3,
2406 numElemReg, regElemList) ;
2407
2408 Release(&pbvc) ;
2409 Release(&bvc) ;
2410 Release(&q_new) ;
2411 Release(&e_new) ;
2412 Release(&p_new) ;
2413 Release(&work) ;
2414 Release(&ql_old) ;
2415 Release(&qq_old) ;
2416 Release(&compHalfStep) ;
2417 Release(&compression) ;
2418 Release(&q_old) ;
2419 Release(&p_old) ;
2420 Release(&delvc) ;
2421 Release(&e_old) ;
2422 }
2423
2424 /******************************************/
2425
2426 static inline
ApplyMaterialPropertiesForElems(Domain & domain,Real_t vnew[])2427 void ApplyMaterialPropertiesForElems(Domain& domain, Real_t vnew[])
2428 {
2429 Index_t numElem = domain.numElem() ;
2430
2431 if (numElem != 0) {
2432 /* Expose all of the variables needed for material evaluation */
2433 Real_t eosvmin = domain.eosvmin() ;
2434 Real_t eosvmax = domain.eosvmax() ;
2435
2436 #ifdef LULESH_USE_OPENMP
2437 #pragma omp parallel
2438 #endif
2439 {
2440 // Bound the updated relative volumes with eosvmin/max
2441 if (eosvmin != Real_t(0.)) {
2442 #ifdef LULESH_USE_OPENMP
2443 #pragma omp for firstprivate(numElem)
2444 #endif
2445 for(Index_t i=0 ; i<numElem ; ++i) {
2446 if (vnew[i] < eosvmin)
2447 vnew[i] = eosvmin ;
2448 }
2449 }
2450
2451 if (eosvmax != Real_t(0.)) {
2452 #ifdef LULESH_USE_OPENMP
2453 #pragma omp for nowait firstprivate(numElem)
2454 #endif
2455 for(Index_t i=0 ; i<numElem ; ++i) {
2456 if (vnew[i] > eosvmax)
2457 vnew[i] = eosvmax ;
2458 }
2459 }
2460
2461 // This check may not make perfect sense in LULESH, but
2462 // it's representative of something in the full code -
2463 // just leave it in, please
2464 #ifdef LULESH_USE_OPENMP
2465 #pragma omp for nowait firstprivate(numElem)
2466 #endif
2467 for (Index_t i=0; i<numElem; ++i) {
2468 Real_t vc = domain.v(i) ;
2469 if (eosvmin != Real_t(0.)) {
2470 if (vc < eosvmin)
2471 vc = eosvmin ;
2472 }
2473 if (eosvmax != Real_t(0.)) {
2474 if (vc > eosvmax)
2475 vc = eosvmax ;
2476 }
2477 if (vc <= 0.) {
2478 #if USE_MPI
2479 MPI_Abort(MPI_COMM_WORLD, VolumeError) ;
2480 #else
2481 exit(VolumeError);
2482 #endif
2483 }
2484 }
2485 }
2486
2487 for (Int_t r=0 ; r<domain.numReg() ; r++) {
2488 Index_t numElemReg = domain.regElemSize(r);
2489 Index_t *regElemList = domain.regElemlist(r);
2490 Int_t rep;
2491 //Determine load imbalance for this region
2492 //round down the number with lowest cost
2493 if(r < domain.numReg()/2)
2494 rep = 1;
2495 //you don't get an expensive region unless you at least have 5 regions
2496 else if(r < (domain.numReg() - (domain.numReg()+15)/20))
2497 rep = 1 + domain.cost();
2498 //very expensive regions
2499 else
2500 rep = 10 * (1+ domain.cost());
2501 EvalEOSForElems(domain, vnew, numElemReg, regElemList, rep);
2502 }
2503
2504 }
2505 }
2506
2507 /******************************************/
2508
2509 static inline
UpdateVolumesForElems(Domain & domain,Real_t * vnew,Real_t v_cut,Index_t length)2510 void UpdateVolumesForElems(Domain &domain, Real_t *vnew,
2511 Real_t v_cut, Index_t length)
2512 {
2513 if (length != 0) {
2514 #ifdef LULESH_USE_OPENMP
2515 #pragma omp parallel for firstprivate(length, v_cut)
2516 #endif
2517 for(Index_t i=0 ; i<length ; ++i) {
2518 Real_t tmpV = vnew[i] ;
2519
2520 if ( FABS(tmpV - Real_t(1.0)) < v_cut )
2521 tmpV = Real_t(1.0) ;
2522
2523 domain.v(i) = tmpV ;
2524 }
2525 }
2526
2527 return ;
2528 }
2529
2530 /******************************************/
2531
2532 static inline
LagrangeElements(Domain & domain,Index_t numElem)2533 void LagrangeElements(Domain& domain, Index_t numElem)
2534 {
2535 Real_t *vnew = Allocate<Real_t>(numElem) ; /* new relative vol -- temp */
2536
2537 CalcLagrangeElements(domain, vnew) ;
2538
2539 /* Calculate Q. (Monotonic q option requires communication) */
2540 CalcQForElems(domain, vnew) ;
2541
2542 ApplyMaterialPropertiesForElems(domain, vnew) ;
2543
2544 UpdateVolumesForElems(domain, vnew,
2545 domain.v_cut(), numElem) ;
2546
2547 Release(&vnew);
2548 }
2549
2550 /******************************************/
2551
2552 static inline
CalcCourantConstraintForElems(Domain & domain,Index_t length,Index_t * regElemlist,Real_t qqc,Real_t & dtcourant)2553 void CalcCourantConstraintForElems(Domain &domain, Index_t length,
2554 Index_t *regElemlist,
2555 Real_t qqc, Real_t& dtcourant)
2556 {
2557 #if LULESH_USE_OPENMP
2558 Index_t threads = omp_get_max_threads();
2559 static Index_t *courant_elem_per_thread;
2560 static Real_t *dtcourant_per_thread;
2561 static bool first = true;
2562 if (first) {
2563 courant_elem_per_thread = new Index_t[threads];
2564 dtcourant_per_thread = new Real_t[threads];
2565 first = false;
2566 }
2567 #else
2568 Index_t threads = 1;
2569 Index_t courant_elem_per_thread[1];
2570 Real_t dtcourant_per_thread[1];
2571 #endif
2572
2573
2574 #ifdef LULESH_USE_OPENMP
2575 #pragma omp parallel firstprivate(length, qqc)
2576 #endif
2577 {
2578 Real_t qqc2 = Real_t(64.0) * qqc * qqc ;
2579 Real_t dtcourant_tmp = dtcourant;
2580 Index_t courant_elem = -1 ;
2581
2582 #if LULESH_USE_OPENMP
2583 Index_t thread_num = omp_get_thread_num();
2584 #else
2585 Index_t thread_num = 0;
2586 #endif
2587
2588 #ifdef LULESH_USE_OPENMP
2589 #pragma omp for
2590 #endif
2591 for (Index_t i = 0 ; i < length ; ++i) {
2592 Index_t indx = regElemlist[i] ;
2593 Real_t dtf = domain.ss(indx) * domain.ss(indx) ;
2594
2595 if ( domain.vdov(indx) < Real_t(0.) ) {
2596 dtf = dtf
2597 + qqc2 * domain.arealg(indx) * domain.arealg(indx)
2598 * domain.vdov(indx) * domain.vdov(indx) ;
2599 }
2600
2601 dtf = SQRT(dtf) ;
2602 dtf = domain.arealg(indx) / dtf ;
2603
2604 if (domain.vdov(indx) != Real_t(0.)) {
2605 if ( dtf < dtcourant_tmp ) {
2606 dtcourant_tmp = dtf ;
2607 courant_elem = indx ;
2608 }
2609 }
2610 }
2611
2612 dtcourant_per_thread[thread_num] = dtcourant_tmp ;
2613 courant_elem_per_thread[thread_num] = courant_elem ;
2614 }
2615
2616 for (Index_t i = 1; i < threads; ++i) {
2617 if (dtcourant_per_thread[i] < dtcourant_per_thread[0] ) {
2618 dtcourant_per_thread[0] = dtcourant_per_thread[i];
2619 courant_elem_per_thread[0] = courant_elem_per_thread[i];
2620 }
2621 }
2622
2623 if (courant_elem_per_thread[0] != -1) {
2624 dtcourant = dtcourant_per_thread[0] ;
2625 }
2626
2627 return ;
2628
2629 }
2630
2631 /******************************************/
2632
2633 static inline
CalcHydroConstraintForElems(Domain & domain,Index_t length,Index_t * regElemlist,Real_t dvovmax,Real_t & dthydro)2634 void CalcHydroConstraintForElems(Domain &domain, Index_t length,
2635 Index_t *regElemlist, Real_t dvovmax, Real_t& dthydro)
2636 {
2637 #if LULESH_USE_OPENMP
2638 Index_t threads = omp_get_max_threads();
2639 static Index_t *hydro_elem_per_thread;
2640 static Real_t *dthydro_per_thread;
2641 static bool first = true;
2642 if (first) {
2643 hydro_elem_per_thread = new Index_t[threads];
2644 dthydro_per_thread = new Real_t[threads];
2645 first = false;
2646 }
2647 #else
2648 Index_t threads = 1;
2649 Index_t hydro_elem_per_thread[1];
2650 Real_t dthydro_per_thread[1];
2651 #endif
2652
2653 #ifdef LULESH_USE_OPENMP
2654 #pragma omp parallel firstprivate(length, dvovmax)
2655 #endif
2656 {
2657 Real_t dthydro_tmp = dthydro ;
2658 Index_t hydro_elem = -1 ;
2659
2660 #if LULESH_USE_OPENMP
2661 Index_t thread_num = omp_get_thread_num();
2662 #else
2663 Index_t thread_num = 0;
2664 #endif
2665
2666
2667 #ifdef LULESH_USE_OPENMP
2668 #pragma omp for
2669 #endif
2670 for (Index_t i = 0 ; i < length ; ++i) {
2671 Index_t indx = regElemlist[i] ;
2672
2673 if (domain.vdov(indx) != Real_t(0.)) {
2674 Real_t dtdvov = dvovmax / (FABS(domain.vdov(indx))+Real_t(1.e-20)) ;
2675
2676 if ( dthydro_tmp > dtdvov ) {
2677 dthydro_tmp = dtdvov ;
2678 hydro_elem = indx ;
2679 }
2680 }
2681 }
2682
2683 dthydro_per_thread[thread_num] = dthydro_tmp ;
2684 hydro_elem_per_thread[thread_num] = hydro_elem ;
2685 }
2686
2687 for (Index_t i = 1; i < threads; ++i) {
2688 if(dthydro_per_thread[i] < dthydro_per_thread[0]) {
2689 dthydro_per_thread[0] = dthydro_per_thread[i];
2690 hydro_elem_per_thread[0] = hydro_elem_per_thread[i];
2691 }
2692 }
2693
2694 if (hydro_elem_per_thread[0] != -1) {
2695 dthydro = dthydro_per_thread[0] ;
2696 }
2697
2698 return ;
2699 }
2700
2701 /******************************************/
2702
2703 static inline
CalcTimeConstraintsForElems(Domain & domain)2704 void CalcTimeConstraintsForElems(Domain& domain) {
2705
2706 // Initialize conditions to a very large value
2707 domain.dtcourant() = 1.0e+20;
2708 domain.dthydro() = 1.0e+20;
2709
2710 for (Index_t r=0 ; r < domain.numReg() ; ++r) {
2711 /* evaluate time constraint */
2712 CalcCourantConstraintForElems(domain, domain.regElemSize(r),
2713 domain.regElemlist(r),
2714 domain.qqc(),
2715 domain.dtcourant()) ;
2716
2717 /* check hydro constraint */
2718 CalcHydroConstraintForElems(domain, domain.regElemSize(r),
2719 domain.regElemlist(r),
2720 domain.dvovmax(),
2721 domain.dthydro()) ;
2722 }
2723 }
2724
2725 /******************************************/
2726
2727 static inline
LagrangeLeapFrog(Domain & domain)2728 void LagrangeLeapFrog(Domain& domain)
2729 {
2730 #ifdef SEDOV_SYNC_POS_VEL_LATE
2731 Domain_member fieldData[6] ;
2732 #endif
2733
2734 /* calculate nodal forces, accelerations, velocities, positions, with
2735 * applied boundary conditions and slide surface considerations */
2736 LagrangeNodal(domain);
2737
2738
2739 #ifdef SEDOV_SYNC_POS_VEL_LATE
2740 #endif
2741
2742 /* calculate element quantities (i.e. velocity gradient & q), and update
2743 * material states */
2744 LagrangeElements(domain, domain.numElem());
2745
2746 #if USE_MPI
2747 #ifdef SEDOV_SYNC_POS_VEL_LATE
2748 CommRecv(domain, MSG_SYNC_POS_VEL, 6,
2749 domain.sizeX() + 1, domain.sizeY() + 1, domain.sizeZ() + 1,
2750 false, false) ;
2751
2752 fieldData[0] = &Domain::x ;
2753 fieldData[1] = &Domain::y ;
2754 fieldData[2] = &Domain::z ;
2755 fieldData[3] = &Domain::xd ;
2756 fieldData[4] = &Domain::yd ;
2757 fieldData[5] = &Domain::zd ;
2758
2759 CommSend(domain, MSG_SYNC_POS_VEL, 6, fieldData,
2760 domain.sizeX() + 1, domain.sizeY() + 1, domain.sizeZ() + 1,
2761 false, false) ;
2762 #endif
2763 #endif
2764
2765 CalcTimeConstraintsForElems(domain);
2766
2767 #if USE_MPI
2768 #ifdef SEDOV_SYNC_POS_VEL_LATE
2769 CommSyncPosVel(domain) ;
2770 #endif
2771 #endif
2772 }
2773
2774
2775 /******************************************/
2776
main(int argc,char * argv[])2777 int main(int argc, char *argv[])
2778 {
2779 Domain *locDom ;
2780 Int_t numRanks ;
2781 Int_t myRank ;
2782 struct cmdLineOpts opts;
2783
2784 #if USE_MPI
2785 Domain_member fieldData ;
2786
2787 MPI_Init(&argc, &argv) ;
2788 MPI_Comm_size(MPI_COMM_WORLD, &numRanks) ;
2789 MPI_Comm_rank(MPI_COMM_WORLD, &myRank) ;
2790 #else
2791 numRanks = 1;
2792 myRank = 0;
2793 #endif
2794
2795 /* Set defaults that can be overridden by command line opts */
2796 opts.its = 9999999;
2797 opts.nx = 30;
2798 opts.numReg = 11;
2799 opts.numFiles = (int)(numRanks+10)/9;
2800 opts.showProg = 0;
2801 opts.quiet = 0;
2802 opts.viz = 0;
2803 opts.balance = 1;
2804 opts.cost = 1;
2805
2806 ParseCommandLineOptions(argc, argv, myRank, &opts);
2807
2808 if ((myRank == 0) && (opts.quiet == 0)) {
2809 printf("Running problem size %d^3 per domain until completion\n", opts.nx);
2810 printf("Num processors: %d\n", numRanks);
2811 #if LULESH_USE_OPENMP
2812 printf("Num threads: %d\n", omp_get_max_threads());
2813 #endif
2814 printf("Total number of elements: %lld\n\n", (long long int)(numRanks*opts.nx*opts.nx*opts.nx));
2815 printf("To run other sizes, use -s <integer>.\n");
2816 printf("To run a fixed number of iterations, use -i <integer>.\n");
2817 printf("To run a more or less balanced region set, use -b <integer>.\n");
2818 printf("To change the relative costs of regions, use -c <integer>.\n");
2819 printf("To print out progress, use -p\n");
2820 printf("To write an output file for VisIt, use -v\n");
2821 printf("See help (-h) for more options\n\n");
2822 }
2823
2824 // Set up the mesh and decompose. Assumes regular cubes for now
2825 Int_t col, row, plane, side;
2826 InitMeshDecomp(numRanks, myRank, &col, &row, &plane, &side);
2827
2828 // Build the main data structure and initialize it
2829 locDom = new Domain(numRanks, myRank, col, row, plane, opts.nx,
2830 side, opts.numReg, opts.balance, opts.cost) ;
2831
2832
2833 #if USE_MPI
2834 fieldData = &Domain::nodalMass ;
2835
2836 // Initial domain boundary communication
2837 CommRecv(*locDom, MSG_COMM_SBN, 1,
2838 locDom->sizeX() + 1, locDom->sizeY() + 1, locDom->sizeZ() + 1,
2839 true, false) ;
2840 CommSend(*locDom, MSG_COMM_SBN, 1, &fieldData,
2841 locDom->sizeX() + 1, locDom->sizeY() + 1, locDom->sizeZ() + 1,
2842 true, false) ;
2843 CommSBN(*locDom, 1, &fieldData) ;
2844
2845 // End initialization
2846 MPI_Barrier(MPI_COMM_WORLD);
2847 #endif
2848 /*--------------------------------------------------------------------------
2849 *--------------------------------------------------------------------------
2850 * Begin Ascent Integration
2851 *--------------------------------------------------------------------------
2852 *--------------------------------------------------------------------------*/
2853 //
2854 // setup Ascent In-situ rendering.
2855 //
2856 Ascent ascent;
2857 Node ascent_opts;
2858
2859 #if USE_MPI
2860 ascent_opts["mpi_comm"] = MPI_Comm_c2f(MPI_COMM_WORLD);
2861 #endif
2862
2863 ascent.open(ascent_opts);
2864 // BEGIN timestep to solution */
2865 #if USE_MPI
2866 double start = MPI_Wtime();
2867 #else
2868 timeval start;
2869 gettimeofday(&start, NULL) ;
2870 #endif
2871
2872 //debug to see region sizes
2873 // for(Int_t i = 0; i < locDom->numReg(); i++)
2874 // std::cout << "region" << i + 1<< "size" << locDom->regElemSize(i) <<std::endl;
2875
2876 conduit::Node scenes;
2877 scenes["s1/plots/p1/type"] = "pseudocolor";
2878 scenes["s1/plots/p1/field"] = "e";
2879 double vec3[3];
2880 vec3[0] = -0.6; vec3[1] = -0.6; vec3[2] = -0.8;
2881 scenes["s1/renders/r1/camera/position"].set_float64_ptr(vec3,3);
2882 scenes["s1/renders/r1/image_prefix"] = "lulesh_image";
2883
2884 conduit::Node actions;
2885 conduit::Node &add_plots = actions.append();
2886 add_plots["action"] = "add_scenes";
2887 add_plots["scenes"] = scenes;
2888 conduit::Node &execute = actions.append();
2889 execute["action"] = "execute";
2890
2891 conduit::Node &reset_action = actions.append();
2892 reset_action["action"] = "reset";
2893
2894 while((locDom->time() < locDom->stoptime()) && (locDom->cycle() < opts.its)) {
2895 {
2896 ASCENT_BLOCK_TIMER(LULESH_MAIN_LOOP)
2897 TimeIncrement(*locDom) ;
2898 LagrangeLeapFrog(*locDom) ;
2899 }
2900 if ((opts.showProg != 0) && (opts.quiet == 0) && (myRank == 0)) {
2901 printf("cycle = %d, time = %e, dt=%e\n",
2902 locDom->cycle(), double(locDom->time()), double(locDom->deltatime()) ) ;
2903 }
2904 //-- begin calls to ascent -- //
2905 if ( (locDom->cycle() % 1 == 0) || (locDom->cycle() == 0)) {
2906 char outFileName[30];
2907 sprintf(outFileName,"lulesh_image%03d",locDom->cycle());
2908 //
2909 // Create the actions.
2910 //
2911 ascent.publish(locDom->visitNode());
2912 ascent.execute(actions);
2913 }
2914 }
2915 ascent.close();
2916
2917 /*--------------------------------------------------------------------------
2918 *--------------------------------------------------------------------------
2919 * End Ascent Integration
2920 *--------------------------------------------------------------------------
2921 *--------------------------------------------------------------------------*/
2922
2923 // Use reduced max elapsed time
2924 double elapsed_time;
2925 #if USE_MPI
2926 elapsed_time = MPI_Wtime() - start;
2927 #else
2928 timeval end;
2929 gettimeofday(&end, NULL) ;
2930 elapsed_time = (double)(end.tv_sec - start.tv_sec) + ((double)(end.tv_usec - start.tv_usec))/1000000 ;
2931 #endif
2932 double elapsed_timeG;
2933 #if USE_MPI
2934 MPI_Reduce(&elapsed_time, &elapsed_timeG, 1, MPI_DOUBLE,
2935 MPI_MAX, 0, MPI_COMM_WORLD);
2936 #else
2937 elapsed_timeG = elapsed_time;
2938 #endif
2939
2940
2941
2942 // Write out final viz file */
2943 if (opts.viz) {
2944 DumpToVisit(*locDom, opts.numFiles, myRank, numRanks) ;
2945 }
2946
2947 if ((myRank == 0) && (opts.quiet == 0)) {
2948 VerifyAndWriteFinalOutput(elapsed_timeG, *locDom, opts.nx, numRanks);
2949 }
2950
2951
2952 #if USE_MPI
2953 MPI_Barrier(MPI_COMM_WORLD);
2954 MPI_Finalize() ;
2955 #endif
2956
2957 return 0 ;
2958 }
2959