1 // Copyright (c) 2017-2021, Lawrence Livermore National Security, LLC and 2 // other Axom Project Developers. See the top-level LICENSE file for details. 3 // 4 // SPDX-License-Identifier: (BSD-3-Clause) 5 6 #include "lulesh.hpp" 7 8 #include <math.h> 9 10 #ifdef AXOM_USE_MPI 11 #include <mpi.h> 12 #endif 13 #ifdef AXOM_USE_OPENMP 14 #include <omp.h> 15 #endif 16 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include <limits.h> 21 #include <cstdlib> 22 #include <algorithm> 23 24 25 namespace slamLulesh { 26 27 ///////////////////////////////////////////////////////////////////// Domain(Int_t numRanks,Index_t colLoc,Index_t rowLoc,Index_t planeLoc,Index_t nx,Int_t tp,Int_t nr,Int_t balance,Int_t cost)28 Domain::Domain(Int_t numRanks, Index_t colLoc, 29 Index_t rowLoc, Index_t planeLoc, 30 Index_t nx, Int_t tp, Int_t nr, Int_t balance, Int_t cost) 31 : m_e_cut(Real_t(1.0e-7)), 32 m_p_cut(Real_t(1.0e-7)), 33 m_q_cut(Real_t(1.0e-7)), 34 m_v_cut(Real_t(1.0e-10)), 35 m_u_cut(Real_t(1.0e-7)), 36 m_hgcoef(Real_t(3.0)), 37 m_ss4o3(Real_t(4.0) / Real_t(3.0)), 38 m_qstop(Real_t(1.0e+12)), 39 m_monoq_max_slope(Real_t(1.0)), 40 m_monoq_limiter_mult(Real_t(2.0)), 41 m_qlc_monoq(Real_t(0.5)), 42 m_qqc_monoq(Real_t(2.0) / Real_t(3.0)), 43 m_qqc(Real_t(2.0)), 44 m_eosvmax(Real_t(1.0e+9)), 45 m_eosvmin(Real_t(1.0e-9)), 46 m_pmin(Real_t(0.)), 47 m_emin(Real_t(-1.0e+15)), 48 m_dvovmax(Real_t(0.1)), 49 m_refdens(Real_t(1.0)) 50 { 51 52 Index_t edgeElems = nx; 53 Index_t edgeNodes = edgeElems + 1; 54 55 this->cost() = cost; 56 57 m_tp = tp; 58 m_numRanks = numRanks; 59 60 /////////////////////////////// 61 // Initialize Sedov Mesh 62 /////////////////////////////// 63 64 // construct a uniform box for this processor 65 66 m_colLoc = colLoc; 67 m_rowLoc = rowLoc; 68 m_planeLoc = planeLoc; 69 70 m_sizeX = edgeElems; 71 m_sizeY = edgeElems; 72 m_sizeZ = edgeElems; 73 m_elemSet = ElemSet(edgeElems * edgeElems * edgeElems); 74 m_nodeSet = NodeSet(edgeNodes * edgeNodes * edgeNodes); 75 m_cornerSet = CornerSet( m_elemSet.size() * 8); 76 77 Int_t facesPerPlane = edgeElems * edgeElems; 78 Int_t numExtendedElems = m_elemSet.size() // local elem 79 + 2 * facesPerPlane // plane ghosts 80 + 2 * facesPerPlane // row ghosts 81 + 2 * facesPerPlane // col ghosts 82 ; 83 84 m_extendedElemSet = ExtendedElemSet( numExtendedElems ); 85 86 87 // Elem-centered 88 AllocateElemPersistent(numElem()); 89 90 // Node-centered 91 AllocateNodePersistent(numNode()); 92 93 SetupCommBuffers(); 94 95 // Basic Field Initialization 96 for (Index_t i = 0; i<numElem(); ++i) 97 { 98 e(i) = Real_t(0.0); 99 p(i) = Real_t(0.0); 100 q(i) = Real_t(0.0); 101 ss(i) = Real_t(0.0); 102 } 103 104 // Note - v initializes to 1.0, not 0.0! 105 for (Index_t i = 0; i<numElem(); ++i) 106 { 107 v(i) = Real_t(1.0); 108 } 109 110 for (Index_t i = 0; i<numNode(); ++i) 111 { 112 xd(i) = Real_t(0.0); 113 yd(i) = Real_t(0.0); 114 zd(i) = Real_t(0.0); 115 } 116 117 for (Index_t i = 0; i<numNode(); ++i) 118 { 119 xdd(i) = Real_t(0.0); 120 ydd(i) = Real_t(0.0); 121 zdd(i) = Real_t(0.0); 122 } 123 124 for (Index_t i = 0; i<numNode(); ++i) 125 { 126 nodalMass(i) = Real_t(0.0); 127 } 128 129 BuildMesh(nx, edgeNodes, edgeElems); 130 131 #ifdef AXOM_USE_OPENMP 132 SetupThreadSupportStructures(); 133 #endif 134 135 // Setup region index sets. For now, these are constant sized 136 // throughout the run, but could be changed every cycle to 137 // simulate effects of ALE on the lagrange solver 138 CreateRegionIndexSets(nr, balance); 139 140 // Setup symmetry nodesets 141 SetupSymmetryPlanes(edgeNodes); 142 143 // Setup element connectivities 144 SetupElementConnectivities(edgeElems); 145 146 // Setup symmetry planes and free surface boundary arrays 147 SetupBoundaryConditions(edgeElems); 148 149 150 // Setup defaults 151 152 // These can be changed (requires recompile) if you want to run 153 // with a fixed timestep, or to a different end time, but it's 154 // probably easier/better to just run a fixed number of timesteps 155 // using the -i flag in 2.x 156 157 dtfixed() = Real_t(-1.0e-6); // Negative means use courant condition 158 stoptime() = Real_t(1.0e-2);// *Real_t(edgeElems*tp/45.0) ; 159 160 // Initial conditions 161 deltatimemultlb() = Real_t(1.1); 162 deltatimemultub() = Real_t(1.2); 163 dtcourant() = Real_t(1.0e+20); 164 dthydro() = Real_t(1.0e+20); 165 dtmax() = Real_t(1.0e-2); 166 time() = Real_t(0.); 167 cycle() = Int_t(0); 168 169 // initialize field data 170 for (Index_t i = 0; i<numElem(); ++i) 171 { 172 Real_t x_local[8], y_local[8], z_local[8]; 173 ElemNodeSet elemToNode = nodelist(i); 174 for( Index_t lnode = 0; lnode< elemToNode.size(); ++lnode ) 175 { 176 Index_t gnode = elemToNode[lnode]; 177 x_local[lnode] = x(gnode); 178 y_local[lnode] = y(gnode); 179 z_local[lnode] = z(gnode); 180 } 181 182 // volume calculations 183 Real_t volume = CalcElemVolume(x_local, y_local, z_local ); 184 volo(i) = volume; 185 elemMass(i) = volume; 186 for (Index_t j = 0; j<8; ++j) 187 { 188 Index_t idx = elemToNode[j]; 189 nodalMass(idx) += volume / Real_t(8.0); 190 } 191 } 192 193 // deposit initial energy 194 // An energy of 3.948746e+7 is correct for a problem with 195 // 45 zones along a side - we need to scale it 196 const Real_t ebase = Real_t(3.948746e+7); 197 Real_t scale = (nx * m_tp) / Real_t(45.0); 198 Real_t einit = ebase * scale * scale * scale; 199 if (m_rowLoc + m_colLoc + m_planeLoc == 0) 200 { 201 // Dump into the first zone (which we know is in the corner) 202 // of the domain that sits at the origin 203 e(0) = einit; 204 } 205 //set initial deltatime base on analytic CFL calculation 206 deltatime() = (Real_t(.5) * cbrt(volo(0))) / sqrt(Real_t(2.0) * einit); 207 208 } // End constructor 209 210 211 //////////////////////////////////////////////////////////////////////////////// 212 void BuildMesh(Int_t nx,Int_t edgeNodes,Int_t edgeElems)213 Domain::BuildMesh(Int_t nx, Int_t edgeNodes, Int_t edgeElems) 214 { 215 Index_t meshEdgeElems = m_tp * nx; 216 217 // initialize nodal coordinates 218 Index_t nidx = 0; 219 Real_t tz = Real_t(1.125) * Real_t(m_planeLoc * nx) / Real_t(meshEdgeElems); 220 221 for (Index_t plane = 0; plane<edgeNodes; ++plane) 222 { 223 Real_t ty = Real_t(1.125) * Real_t(m_rowLoc * nx) / Real_t(meshEdgeElems); 224 for (Index_t row = 0; row<edgeNodes; ++row) 225 { 226 Real_t tx = Real_t(1.125) * Real_t(m_colLoc * nx) / Real_t(meshEdgeElems); 227 for (Index_t col = 0; col<edgeNodes; ++col) 228 { 229 x(nidx) = tx; 230 y(nidx) = ty; 231 z(nidx) = tz; 232 ++nidx; 233 // tx += ds ; // may accumulate roundoff... 234 tx = Real_t(1.125) * Real_t(m_colLoc * nx + col + 1) / Real_t(meshEdgeElems); 235 } 236 // ty += ds ; // may accumulate roundoff... 237 ty = Real_t(1.125) * Real_t(m_rowLoc * nx + row + 1) / Real_t(meshEdgeElems); 238 } 239 // tz += ds ; // may accumulate roundoff... 240 tz = Real_t(1.125) * Real_t(m_planeLoc * nx + plane + 1) / Real_t(meshEdgeElems); 241 } 242 243 244 // embed hexahedral elements in nodal point lattice 245 // SLAM NOTE: This could be represented using an implicit relation, when available 246 // SLAM NOTE: Alternatively, the underlying connectivity should be derivable from an implicit Cartesian grid. 247 IntsRegistry::BufferType& local_nodelist = m_intsRegistry.addBuffer("hex_node_boundary", 8 * numElem() ); 248 Index_t zidx = 0; 249 nidx = 0; 250 const int planeNodes = edgeNodes * edgeNodes; 251 for (Index_t plane = 0; plane<edgeElems; ++plane) 252 { 253 for (Index_t row = 0; row<edgeElems; ++row) 254 { 255 for (Index_t col = 0; col<edgeElems; ++col) 256 { 257 Index_t *localNode = &local_nodelist[Index_t(8) * zidx]; 258 259 localNode[0] = nidx; 260 localNode[1] = nidx + 1; 261 localNode[2] = nidx + edgeNodes + 1; 262 localNode[3] = nidx + edgeNodes; 263 localNode[4] = nidx + planeNodes; 264 localNode[5] = nidx + planeNodes + 1; 265 localNode[6] = nidx + planeNodes + edgeNodes + 1; 266 localNode[7] = nidx + planeNodes + edgeNodes; 267 ++zidx; 268 ++nidx; 269 } 270 ++nidx; 271 } 272 nidx += edgeNodes; 273 } 274 275 m_nodelist.bindIndices(local_nodelist.size(), &local_nodelist); 276 SLIC_ASSERT( m_nodelist.isValid()); 277 } 278 279 280 //////////////////////////////////////////////////////////////////////////////// 281 void SetupThreadSupportStructures()282 Domain::SetupThreadSupportStructures() 283 { 284 #ifdef AXOM_USE_OPENMP 285 Index_t numthreads = omp_get_max_threads(); 286 #else 287 Index_t numthreads = 1; 288 #endif 289 290 if (numthreads > 1) 291 { 292 NodeIndexMap nodeCornerCount(&m_nodeSet); // Keeps track of the number of corners per node 293 294 for (Index_t i = 0; i<numElem(); ++i) // foreach elem 295 { 296 ElemNodeSet nl = nodelist(i); // grab the Elem2Node relation for the element 297 for (Index_t j = 0; j < nl.size(); ++j) // for each node of the element 298 { 299 ++(nodeCornerCount[ nl[j]] ); // increment the count for that node 300 } 301 } 302 303 // Invert the zones to corner relation 304 IntsRegistry::BufferType& local_begins = m_intsRegistry.addBuffer("node_corner_begins", numNode() + 1 ); 305 IntsRegistry::BufferType& local_relIndices = m_intsRegistry.addBuffer("node_corner_rel_indices", m_cornerSet.size() ); 306 307 local_begins[0] = 0; // use the counts array to set the begins array 308 for (Index_t i = 1; i <= numNode(); ++i) 309 { 310 local_begins[i] = local_begins[i - 1] + nodeCornerCount[i - 1]; 311 nodeCornerCount[i - 1] = 0; 312 } 313 314 for (Index_t i = 0; i < numElem(); ++i) // foreach elem 315 { 316 ElemNodeSet nl = nodelist(i); // grab the Elem2Node relation for the elem 317 for (Index_t j = 0; j < nl.size(); ++j) // foreach node of the elem 318 { 319 Index_t m = nl[j]; // m is the node index pointed to by this corner 320 Index_t k = i * 8 + j; // k is the corner index (elem*8+offset) 321 Index_t offset = local_begins[m] + nodeCornerCount[m]; // offset is where this element belongs in the offsets array 322 local_relIndices[offset] = k; // this is the offsets array of the node to corner relation 323 ++(nodeCornerCount[m]); // increment the count for this node 324 } 325 } 326 327 // Finally create the relation over these arrays and check validity 328 m_nodeCornerRelation = NodeToCornerRelation(&m_nodeSet, &m_cornerSet); 329 m_nodeCornerRelation.bindBeginOffsets(numNode(), &local_begins); 330 m_nodeCornerRelation.bindIndices(local_relIndices.size(), &local_relIndices); 331 332 SLIC_ASSERT_MSG(m_nodeCornerRelation.isValid(), "Generating Node to Corner relation: Corner index out of range." ); 333 } 334 } 335 336 337 //////////////////////////////////////////////////////////////////////////////// 338 void SetupCommBuffers()339 Domain::SetupCommBuffers() 340 { 341 // allocate a buffer large enough for nodal ghost data 342 Index_t maxEdgeSize = MAX(this->sizeX(), MAX(this->sizeY(), this->sizeZ())) + 1; 343 344 m_maxPlaneSize = CACHE_ALIGN_REAL(maxEdgeSize * maxEdgeSize); 345 m_maxEdgeSize = CACHE_ALIGN_REAL(maxEdgeSize); 346 347 // assume communication to 6 neighbors by default 348 m_rowMin = (m_rowLoc == 0) ? 0 : 1; 349 m_rowMax = (m_rowLoc == m_tp - 1) ? 0 : 1; 350 m_colMin = (m_colLoc == 0) ? 0 : 1; 351 m_colMax = (m_colLoc == m_tp - 1) ? 0 : 1; 352 m_planeMin = (m_planeLoc == 0) ? 0 : 1; 353 m_planeMax = (m_planeLoc == m_tp - 1) ? 0 : 1; 354 355 #ifdef AXOM_USE_MPI 356 // account for face communication 357 Index_t comBufSize = 358 (m_rowMin + m_rowMax + m_colMin + m_colMax + m_planeMin + m_planeMax) * 359 m_maxPlaneSize * MAX_FIELDS_PER_MPI_COMM; 360 361 // account for edge communication 362 comBufSize += 363 ((m_rowMin & m_colMin) + (m_rowMin & m_planeMin) + (m_colMin & m_planeMin) + 364 (m_rowMax & m_colMax) + (m_rowMax & m_planeMax) + (m_colMax & m_planeMax) + 365 (m_rowMax & m_colMin) + (m_rowMin & m_planeMax) + (m_colMin & m_planeMax) + 366 (m_rowMin & m_colMax) + (m_rowMax & m_planeMin) + (m_colMax & m_planeMin)) * 367 m_maxEdgeSize * MAX_FIELDS_PER_MPI_COMM; 368 369 // account for corner communication 370 // factor of 16 is so each buffer has its own cache line 371 comBufSize += ((m_rowMin & m_colMin & m_planeMin) + 372 (m_rowMin & m_colMin & m_planeMax) + 373 (m_rowMin & m_colMax & m_planeMin) + 374 (m_rowMin & m_colMax & m_planeMax) + 375 (m_rowMax & m_colMin & m_planeMin) + 376 (m_rowMax & m_colMin & m_planeMax) + 377 (m_rowMax & m_colMax & m_planeMin) + 378 (m_rowMax & m_colMax & m_planeMax)) * CACHE_COHERENCE_PAD_REAL; 379 380 this->commDataSend = new Real_t[comBufSize]; 381 this->commDataRecv = new Real_t[comBufSize]; 382 // prevent floating point exceptions 383 memset( this->commDataSend, 0, comBufSize * sizeof(Real_t)); 384 memset( this->commDataRecv, 0, comBufSize * sizeof(Real_t)); 385 #endif 386 387 } 388 389 390 //////////////////////////////////////////////////////////////////////////////// 391 void CreateRegionIndexSets(Int_t nr,Int_t balance)392 Domain::CreateRegionIndexSets(Int_t nr, Int_t balance) 393 { 394 using RegionToElemDynamicRelation = axom::slam::DynamicVariableRelation<PositionType, ElementType>; 395 396 #ifdef AXOM_USE_MPI 397 int myRank; 398 MPI_Comm_rank(MPI_COMM_WORLD, &myRank); 399 srand(myRank); 400 #else 401 srand(0); 402 Index_t myRank = 0; 403 #endif 404 405 // Create a region set over the number of regions in the mesh -- regions are offset by one 406 m_regionSet = RegionSet::SetBuilder().size(nr).offset(1); 407 408 // Generate the region to element map as a dynamic relation. 409 // We will linearize this into a static relation below... 410 RegionToElemDynamicRelation reg2Elems(&m_regionSet, &m_elemSet); 411 412 //if we only have one region just fill it 413 if(numReg() == 1) 414 { 415 // Create the region material number map over the elements 416 const Index_t regMatID = m_regionSet[0]; 417 m_elemRegNum = ElemIntMap(&m_elemSet, regMatID); 418 } 419 //If we have more than one region distribute the elements. 420 else { 421 Int_t regionNum, regionIdx; 422 Int_t lastReg = -1; 423 Index_t elements; 424 Index_t runto = 0; 425 426 // Create the region material map over the elements 427 m_elemRegNum = ElemIntMap(&m_elemSet); 428 429 // Setup an unnormalized CDF to randomly choose the region 430 RegionIntMap regChooserProbs(&m_regionSet); 431 432 // Determine the relative weights of all the regions as a CDF. 433 // This is based off the -b flag. Balance is the value passed into b. 434 Int_t costDenominator = 0; 435 for (Index_t i = 0; i< numReg(); ++i) 436 { 437 costDenominator += pow((i + 1), balance); //Total sum of all regions weights 438 regChooserProbs[i] = costDenominator; //Chance of hitting a given region is (regChooserProbs[i] - regChooserProbs[i-1])/costDenominator 439 } 440 441 // Setup an unnormalized CDF to randomly choose the number of elements in each run 442 const int sizeChooserProbs[7] = {773, 937, 970, 974, 978, 981, 1000}; 443 using Builder = axom::slam::RangeSet<>::SetBuilder; 444 axom::slam::RangeSet<> sizeChooser[7]; 445 sizeChooser[0] = Builder().range(1,16); 446 sizeChooser[1] = Builder().range(16,32); 447 sizeChooser[2] = Builder().range(32,64); 448 sizeChooser[3] = Builder().range(64,128); 449 sizeChooser[4] = Builder().range(128,256); 450 sizeChooser[5] = Builder().range(256,512); 451 sizeChooser[6] = Builder().range(512,2048); 452 453 454 // Assign an element to each region 455 Index_t nextIndex = 0; 456 while (nextIndex < numElem()) 457 { 458 //pick the region; make sure we don't pick the same region twice in a row 459 do { 460 Int_t regionVar = rand() % costDenominator; 461 Index_t i = 0; 462 while(regionVar >= regChooserProbs[i]) 463 i++; 464 465 // Rotate regions based on MPI rank to give each domain a different region with the highest representation 466 regionIdx = (i + myRank) % numReg(); 467 regionNum = m_regionSet[ regionIdx ]; 468 } while(regionNum == lastReg); 469 470 // Determine the number of elements in this run using the sizeChooserProbs CDF and sizeChooser sets. 471 Int_t binSize = rand() % 1000; 472 Index_t binIndex = 0; 473 while(binSize >= sizeChooserProbs[binIndex]) 474 ++binIndex; 475 476 const axom::slam::RangeSet<>& rset = sizeChooser[ binIndex ]; 477 elements = rset[ rand() % rset.size() ]; // gets a random number from this bin's sizeChooser set 478 479 runto = elements + nextIndex; 480 481 //Store the elements. If we hit the end before we run out of elements then just stop. 482 for(; nextIndex < runto && nextIndex < numElem(); nextIndex++) 483 { 484 reg2Elems.insert(regionIdx, nextIndex); 485 m_elemRegNum[nextIndex] = regionNum; 486 } 487 lastReg = regionNum; 488 } 489 } 490 491 SLIC_ASSERT(reg2Elems.isValid()); // Ensure that the dynamic relation is valid 492 493 // Convert from a Dynamic to a Static relation 494 IntsRegistry::BufferType& local_begins = m_intsRegistry.addBuffer("reg_elem_begins", numReg() + 1 ); 495 IntsRegistry::BufferType& local_relIndices = m_intsRegistry.addBuffer("reg_elem_rel_indices", numElem() ); 496 Index_t curOffIdx = 0; 497 for(Index_t regionPos = 0; regionPos < numReg(); ++regionPos) 498 { 499 local_begins[ regionPos] = curOffIdx; 500 for(Index_t elemRelPos = 0; elemRelPos < reg2Elems.size( regionPos); ++elemRelPos) 501 { 502 local_relIndices[curOffIdx++] = reg2Elems[ regionPos][elemRelPos]; 503 } 504 } 505 local_begins[ numReg()] = local_relIndices.size(); 506 507 /// We can finally create the region to elem relation 508 m_regionElementsRel = RegionToElemRelation(&m_regionSet, &m_elemSet); 509 m_regionElementsRel.bindBeginOffsets(numReg(), &local_begins); 510 m_regionElementsRel.bindIndices(local_relIndices.size(), &local_relIndices); 511 512 SLIC_ASSERT(m_regionElementsRel.isValid(true)); // Ensure that the relation is valid 513 } 514 515 ///////////////////////////////////////////////////////////// 516 void SetupSymmetryPlanes(Int_t edgeNodes)517 Domain::SetupSymmetryPlanes(Int_t edgeNodes) 518 { 519 Int_t numSymmNodesX = m_colLoc == 0 ? edgeNodes * edgeNodes : 0; 520 Int_t numSymmNodesY = m_rowLoc == 0 ? edgeNodes * edgeNodes : 0; 521 Int_t numSymmNodesZ = m_planeLoc == 0 ? edgeNodes * edgeNodes : 0; 522 523 // Setup symmetry NodeSets // TODO: Need to add the mesh's nodes as parent 524 m_symmX = SymmNodeSet(numSymmNodesX); // eparentSet = &m_nodeSet 525 m_symmY = SymmNodeSet(numSymmNodesY); 526 m_symmZ = SymmNodeSet(numSymmNodesZ); 527 528 auto& loc_symmX = m_intsRegistry.addField("symmX", &m_symmX).data(); 529 auto& loc_symmY = m_intsRegistry.addField("symmY", &m_symmY).data(); 530 auto& loc_symmZ = m_intsRegistry.addField("symmZ", &m_symmZ).data(); 531 532 // SLAM Note: We should be able to compute these directory from a Cartesian product set defining a regular grid. 533 Index_t nidx = 0; 534 for (Index_t i = 0; i<edgeNodes; ++i) 535 { 536 Index_t planeInc = i * edgeNodes * edgeNodes; 537 Index_t rowInc = i * edgeNodes; 538 for (Index_t j = 0; j<edgeNodes; ++j) 539 { 540 if (m_planeLoc == 0) 541 { 542 loc_symmZ[nidx] = rowInc + j; 543 } 544 if (m_rowLoc == 0) 545 { 546 loc_symmY[nidx] = planeInc + j; 547 } 548 if (m_colLoc == 0) 549 { 550 loc_symmX[nidx] = planeInc + j * edgeNodes; 551 } 552 ++nidx; 553 } 554 } 555 556 m_symmX.data() = &loc_symmX; 557 m_symmY.data() = &loc_symmY; 558 m_symmZ.data() = &loc_symmZ; 559 560 // Verify validity of the sets. 561 SLIC_ASSERT( m_symmX.isValid() && m_symmX.size() == numSymmNodesX && m_symmX.data() == &loc_symmX); 562 SLIC_ASSERT( m_symmY.isValid() && m_symmY.size() == numSymmNodesY && m_symmY.data() == &loc_symmY); 563 SLIC_ASSERT( m_symmZ.isValid() && m_symmZ.size() == numSymmNodesZ && m_symmZ.data() == &loc_symmZ); 564 } 565 566 567 568 ///////////////////////////////////////////////////////////// 569 void SetupElementConnectivities(Int_t edgeElems)570 Domain::SetupElementConnectivities(Int_t edgeElems) 571 { 572 // Create temporary arrays to hold the data 573 IntsRegistry::BufferType& indices_xim = m_intsRegistry.addBuffer("zone_face_xi_m", numElem()); 574 IntsRegistry::BufferType& indices_xip = m_intsRegistry.addBuffer("zone_face_xi_p", numElem()); 575 576 // Setup xi face adjacencies 577 indices_xim[0] = 0; 578 for (Index_t i = 1; i<numElem(); ++i) 579 { 580 indices_xim[i] = i - 1; 581 indices_xip[i - 1] = i; 582 } 583 indices_xip[numElem() - 1] = numElem() - 1; 584 m_lxim.bindIndices(numElem(), &indices_xim); 585 m_lxip.bindIndices(numElem(), &indices_xip); 586 587 // Setup eta face adjacencies 588 IntsRegistry::BufferType& indices_etam = m_intsRegistry.addBuffer("zone_face_eta_m", numElem()); 589 IntsRegistry::BufferType& indices_etap = m_intsRegistry.addBuffer("zone_face_eta_p", numElem()); 590 for (Index_t i = 0; i<edgeElems; ++i) 591 { 592 indices_etam[i] = i; 593 indices_etap[numElem() - edgeElems + i] = numElem() - edgeElems + i; 594 } 595 for (Index_t i = edgeElems; i<numElem(); ++i) 596 { 597 indices_etam[i] = i - edgeElems; 598 indices_etap[i - edgeElems] = i; 599 } 600 m_letam.bindIndices( numElem(), &indices_etam); 601 m_letap.bindIndices( numElem(), &indices_etap); 602 603 // Setup zeta face adjacencies 604 IntsRegistry::BufferType& indices_zetam = m_intsRegistry.addBuffer("zone_face_zeta_m", numElem()); 605 IntsRegistry::BufferType& indices_zetap = m_intsRegistry.addBuffer("zone_face_zeta_p", numElem()); 606 for (Index_t i = 0; i<edgeElems * edgeElems; ++i) 607 { 608 indices_zetam[i] = i; 609 indices_zetap[numElem() - edgeElems * edgeElems + i] = numElem() - edgeElems * edgeElems + i; 610 } 611 for (Index_t i = edgeElems * edgeElems; i<numElem(); ++i) 612 { 613 indices_zetam[i] = i - edgeElems * edgeElems; 614 indices_zetap[i - edgeElems * edgeElems] = i; 615 } 616 m_lzetam.bindIndices(numElem(), &indices_zetam); 617 m_lzetap.bindIndices(numElem(), &indices_zetap); 618 619 // Ensure that all the indices in the relations are valid 620 SLIC_ASSERT( m_lxim.isValid() ); 621 SLIC_ASSERT( m_lxip.isValid() ); 622 SLIC_ASSERT( m_letam.isValid() ); 623 SLIC_ASSERT( m_letap.isValid() ); 624 SLIC_ASSERT( m_lzetam.isValid() ); 625 SLIC_ASSERT( m_lzetap.isValid() ); 626 } 627 628 ///////////////////////////////////////////////////////////// 629 void SetupBoundaryConditions(Int_t edgeElems)630 Domain::SetupBoundaryConditions(Int_t edgeElems) 631 { 632 Index_t ghostIdx[6]; // offsets to ghost locations 633 634 // set up boundary condition information 635 for (Index_t i = 0; i<numElem(); ++i) 636 { 637 elemBC(i) = Int_t(0); 638 } 639 640 for (Index_t i = 0; i<6; ++i) 641 { 642 ghostIdx[i] = INT_MIN; 643 } 644 645 Int_t pidx = numElem(); 646 if (m_planeMin != 0) 647 { 648 ghostIdx[0] = pidx; 649 pidx += sizeX() * sizeY(); 650 } 651 652 if (m_planeMax != 0) 653 { 654 ghostIdx[1] = pidx; 655 pidx += sizeX() * sizeY(); 656 } 657 658 if (m_rowMin != 0) 659 { 660 ghostIdx[2] = pidx; 661 pidx += sizeX() * sizeZ(); 662 } 663 664 if (m_rowMax != 0) 665 { 666 ghostIdx[3] = pidx; 667 pidx += sizeX() * sizeZ(); 668 } 669 670 if (m_colMin != 0) 671 { 672 ghostIdx[4] = pidx; 673 pidx += sizeY() * sizeZ(); 674 } 675 676 if (m_colMax != 0) 677 { 678 ghostIdx[5] = pidx; 679 } 680 681 682 // SLAM HACK: We want to modify the data of a StaticRelation (ElemFaceAdjacencyRelation). 683 // This will be addressed in ATK-927. 684 // For now, grab the underlying array from the registry 685 686 IntsRegistry::BufferType& local_xi_m = m_intsRegistry.getBuffer("zone_face_xi_m"); 687 IntsRegistry::BufferType& local_xi_p = m_intsRegistry.getBuffer("zone_face_xi_p"); 688 IntsRegistry::BufferType& local_eta_m = m_intsRegistry.getBuffer("zone_face_eta_m"); 689 IntsRegistry::BufferType& local_eta_p = m_intsRegistry.getBuffer("zone_face_eta_p"); 690 IntsRegistry::BufferType& local_zeta_m = m_intsRegistry.getBuffer("zone_face_zeta_m"); 691 IntsRegistry::BufferType& local_zeta_p = m_intsRegistry.getBuffer("zone_face_zeta_p"); 692 693 // symmetry plane or free surface BCs 694 for (Index_t i = 0; i<edgeElems; ++i) 695 { 696 Index_t planeInc = i * edgeElems * edgeElems; 697 Index_t rowInc = i * edgeElems; 698 for (Index_t j = 0; j<edgeElems; ++j) 699 { 700 if (m_planeLoc == 0) 701 { 702 elemBC(rowInc + j) |= ZETA_M_SYMM; 703 } 704 else { 705 elemBC(rowInc + j) |= ZETA_M_COMM; 706 local_zeta_m[rowInc + j] = ghostIdx[0] + rowInc + j; 707 } 708 709 if (m_planeLoc == m_tp - 1) 710 { 711 elemBC(rowInc + j + numElem() - edgeElems * edgeElems) |= ZETA_P_FREE; 712 } 713 else { 714 elemBC(rowInc + j + numElem() - edgeElems * edgeElems) |= ZETA_P_COMM; 715 local_zeta_p[rowInc + j + numElem() - edgeElems * edgeElems] = ghostIdx[1] + rowInc + j; 716 } 717 718 if (m_rowLoc == 0) 719 { 720 elemBC(planeInc + j) |= ETA_M_SYMM; 721 } 722 else { 723 elemBC(planeInc + j) |= ETA_M_COMM; 724 local_eta_m[planeInc + j] = ghostIdx[2] + rowInc + j; 725 } 726 727 if (m_rowLoc == m_tp - 1) 728 { 729 elemBC(planeInc + j + edgeElems * edgeElems - edgeElems) |= ETA_P_FREE; 730 } 731 else { 732 elemBC(planeInc + j + edgeElems * edgeElems - edgeElems) |= ETA_P_COMM; 733 local_eta_p[planeInc + j + edgeElems * edgeElems - edgeElems] = ghostIdx[3] + rowInc + j; 734 } 735 736 if (m_colLoc == 0) 737 { 738 elemBC(planeInc + j * edgeElems) |= XI_M_SYMM; 739 } 740 else { 741 elemBC(planeInc + j * edgeElems) |= XI_M_COMM; 742 local_xi_m[planeInc + j * edgeElems] = ghostIdx[4] + rowInc + j; 743 } 744 745 if (m_colLoc == m_tp - 1) 746 { 747 elemBC(planeInc + j * edgeElems + edgeElems - 1) |= XI_P_FREE; 748 } 749 else { 750 elemBC(planeInc + j * edgeElems + edgeElems - 1) |= XI_P_COMM; 751 local_xi_p[planeInc + j * edgeElems + edgeElems - 1] = ghostIdx[5] + rowInc + j; 752 } 753 } 754 } 755 756 // Ensure that all the indices in the element adjacency relations are still valid 757 SLIC_ASSERT( m_lxim.isValid() ); 758 SLIC_ASSERT( m_lxip.isValid() ); 759 SLIC_ASSERT( m_letam.isValid() ); 760 SLIC_ASSERT( m_letap.isValid() ); 761 SLIC_ASSERT( m_lzetam.isValid() ); 762 SLIC_ASSERT( m_lzetap.isValid() ); 763 } 764 765 /////////////////////////////////////////////////////////////////////////// InitMeshDecomp(Int_t numRanks,Int_t myRank,Int_t * col,Int_t * row,Int_t * plane,Int_t * side)766 void InitMeshDecomp(Int_t numRanks, Int_t myRank, 767 Int_t *col, Int_t *row, Int_t *plane, Int_t *side) 768 { 769 Int_t testProcs; 770 Int_t dx, dy, dz; 771 Int_t myDom; 772 773 // Assume cube processor layout for now 774 testProcs = Int_t(cbrt(Real_t(numRanks)) + 0.5); 775 if (testProcs * testProcs * testProcs != numRanks) 776 { 777 SLIC_WARNING("Num processors must be a cube of an integer (1, 8, 27, ...)"); 778 #ifdef AXOM_USE_MPI 779 MPI_Abort(MPI_COMM_WORLD, -1); 780 #else 781 exit(-1); 782 #endif 783 } 784 if (sizeof(Real_t) != 4 && sizeof(Real_t) != 8) 785 { 786 SLIC_WARNING("MPI operations only support float and double right now..."); 787 #ifdef AXOM_USE_MPI 788 MPI_Abort(MPI_COMM_WORLD, -1); 789 #else 790 exit(-1); 791 #endif 792 } 793 if (MAX_FIELDS_PER_MPI_COMM > CACHE_COHERENCE_PAD_REAL) 794 { 795 SLIC_WARNING("Corner element comm buffers too small. Fix code."); 796 #ifdef AXOM_USE_MPI 797 MPI_Abort(MPI_COMM_WORLD, -1); 798 #else 799 exit(-1); 800 #endif 801 } 802 803 dx = testProcs; 804 dy = testProcs; 805 dz = testProcs; 806 807 // temporary test 808 if (dx * dy * dz != numRanks) 809 { 810 SLIC_WARNING("Error -- must have as many domains as procs."); 811 #ifdef AXOM_USE_MPI 812 MPI_Abort(MPI_COMM_WORLD, -1); 813 #else 814 exit(-1); 815 #endif 816 } 817 Int_t remainder = dx * dy * dz % numRanks; 818 if (myRank < remainder) 819 { 820 myDom = myRank * ( 1 + (dx * dy * dz / numRanks)); 821 } 822 else { 823 myDom = remainder * ( 1 + (dx * dy * dz / numRanks)) + 824 (myRank - remainder) * (dx * dy * dz / numRanks); 825 } 826 827 *col = myDom % dx; 828 *row = (myDom / dx) % dy; 829 *plane = myDom / (dx * dy); 830 *side = testProcs; 831 832 return; 833 } 834 835 } // end namespace slamLulesh 836