1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved. *
3 * This file is part of the LIBXSMM library. *
4 * *
5 * For information on the license, see the LICENSE file. *
6 * Further information: https://github.com/hfp/libxsmm/ *
7 * SPDX-License-Identifier: BSD-3-Clause *
8 ******************************************************************************/
9 /*
10 * Copyright (c) 2013-2014, SeisSol Group
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright notice,
17 * this list of conditions and the following disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above copyright notice,
20 * this list of conditions and the following disclaimer in the documentation
21 * and/or other materials provided with the distribution.
22 *
23 * 3. Neither the name of the copyright holder nor the names of its
24 * contributors may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 **/
39 #ifndef PROXY_SEISSOL_ALLOCATOR_HPP
40 #define PROXY_SEISSOL_ALLOCATOR_HPP
41
42 struct CellLocalInformation {
43 enum faceType faceTypes[4];
44 int faceRelations[4][2];
45 unsigned int faceNeighborIds[4];
46 unsigned int ltsSetup;
47 double currentTime[5];
48 };
49
50 struct GlobalData {
51 real *stiffnessMatricesTransposed[3];
52 real *stiffnessMatrices[3];
53 real *fluxMatrices[52];
54 };
55
56 struct LocalIntegrationData {
57 real starMatrices[3][STAR_NNZ];
58 real nApNm1[4][NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES];
59 };
60
61 struct NeighboringIntegrationData {
62 real nAmNm1[4][NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES];
63 };
64
65 struct CellData {
66 struct LocalIntegrationData *localIntegration;
67 struct NeighboringIntegrationData *neighboringIntegration;
68 };
69
70 struct Cells {
71 unsigned int numberOfCells;
72 real (*dofs)[NUMBER_OF_ALIGNED_DOFS];
73 real **buffers;
74 real **derivatives;
75 real *(*faceNeighbors)[4];
76 };
77
78 struct GlobalData **m_globalDataArray;
79 struct GlobalData *m_globalData;
80 struct CellLocalInformation *m_cellInformation;
81 struct CellData *m_cellData;
82 struct Cells *m_cells;
83 struct LocalIntegrationData *m_localIntegration;
84 struct NeighboringIntegrationData * m_neighboringIntegration;
85
86 seissol::kernels::Time m_timeKernel;
87 seissol::kernels::Volume m_volumeKernel;
88 seissol::kernels::Boundary m_boundaryKernel;
89
90 /* This option is needed to avoid pollution of low-level caches */
91 #define NUMBER_OF_THREADS_PER_GLOBALDATA_COPY 4
92 #ifndef NUMBER_OF_THREADS_PER_GLOBALDATA_COPY
93 #define NUMBER_OF_THREADS_PER_GLOBALDATA_COPY 16383
94 #endif
95
96 real m_timeStepWidthSimulation = (real)1.0;
97 real* m_dofs;
98 real* m_tdofs;
99 #ifdef __USE_DERS
100 real* m_ders;
101 #endif
102 real** m_ptdofs;
103 real** m_pder;
104 real* m_faceNeighbors;
105 real** m_globalPointerArray;
106 real* m_globalPointer;
107
init_data_structures(unsigned int i_cells)108 unsigned int init_data_structures(unsigned int i_cells) {
109 // check if we have to read on scenario
110 char* pScenario;
111 std::string s_scenario;
112 bool bUseScenario = false;
113 unsigned int (*scenario_faceType)[4];
114 unsigned int (*scenario_neighbor)[4];
115 unsigned int (*scenario_side)[4];
116 unsigned int (*scenario_orientation)[4];
117
118 pScenario = getenv ("SEISSOL_PROXY_SCENARIO");
119 if (pScenario !=NULL ) {
120 bUseScenario = true;
121 s_scenario.assign(pScenario);
122 std::string file;
123 std::ifstream data;
124 size_t reads;
125 unsigned int value;
126
127 // read scenario size
128 file = s_scenario + ".size";
129 data.open(file.c_str());
130 reads = 0;
131 if (!data) { printf("size of scenario couldn't be read!\n"); exit(-1); }
132 while (data >> i_cells) {
133 printf("scenario name is: %s\n", s_scenario.c_str());
134 printf("scenario has %i cells\n", i_cells);
135 reads++;
136 }
137 data.close();
138 if (reads != 1) { printf("wrong number of sizes (%i) in scenario were read!\n", reads); exit(-1); }
139
140 scenario_neighbor = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
141 scenario_faceType = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
142 scenario_side = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
143 scenario_orientation = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
144
145 // read neighbors
146 file = s_scenario + ".neigh";
147 data.open(file.c_str());
148 if (!data) { printf("neigh of scenario couldn't be read!\n"); exit(-1); }
149 reads = 0;
150 while (data >> value) {
151 scenario_neighbor[reads/4][reads%4] = value;
152 reads++;
153 }
154 data.close();
155 if (reads != i_cells*4) { printf("wrong neigh (%i) in scenario were read!\n", reads); exit(-1); }
156
157 // read faceTypes
158 file = s_scenario + ".bound";
159 data.open(file.c_str());
160 if (!data) { printf("bound of scenario couldn't be read!\n"); exit(-1); }
161 reads = 0;
162 while (data >> value) {
163 scenario_faceType[reads/4][reads%4] = value;
164 reads++;
165 }
166 data.close();
167 if (reads != i_cells*4) { printf("wrong faceType (%i) in scenario were read!\n", reads); exit(-1); }
168
169 // read sides
170 file = s_scenario + ".sides";
171 data.open(file.c_str());
172 if (!data) { printf("sides of scenario couldn't be read!\n"); exit(-1); }
173 reads = 0;
174 while (data >> value) {
175 scenario_side[reads/4][reads%4] = value;
176 reads++;
177 }
178 data.close();
179 if (reads != i_cells*4) { printf("wrong sides (%i) in scenario were read!\n", reads); exit(-1); }
180
181 // read orientation
182 file = s_scenario + ".orient";
183 data.open(file.c_str());
184 if (!data) { printf("orientations of scenario couldn't be read!\n"); exit(-1); }
185 reads = 0;
186 while (data >> value) {
187 scenario_orientation[reads/4][reads%4] = value;
188 reads++;
189 }
190 data.close();
191 if (reads != i_cells*4) { printf("wrong orientations (%i) in scenario were read!\n", reads); exit(-1); }
192 }
193
194 // init RNG
195 libxsmm_rng_set_seed(i_cells);
196
197 // cell information
198 m_cellInformation = (CellLocalInformation*)malloc(i_cells*sizeof(CellLocalInformation));
199 for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
200 for (unsigned int f = 0; f < 4; f++) {
201 if (bUseScenario == true ) {
202 switch (scenario_faceType[l_cell][f]) {
203 case 0:
204 m_cellInformation[l_cell].faceTypes[f] = regular;
205 break;
206 case 1:
207 m_cellInformation[l_cell].faceTypes[f] = freeSurface;
208 break;
209 case 3:
210 m_cellInformation[l_cell].faceTypes[f] = dynamicRupture;
211 break;
212 case 5:
213 m_cellInformation[l_cell].faceTypes[f] = outflow;
214 break;
215 case 6:
216 m_cellInformation[l_cell].faceTypes[f] = periodic;
217 break;
218 default:
219 printf("unsupported faceType (%i)!\n", scenario_faceType[l_cell][f]);
220 exit(-1);
221 break;
222 }
223 m_cellInformation[l_cell].faceRelations[f][0] = scenario_side[l_cell][f];
224 m_cellInformation[l_cell].faceRelations[f][1] = scenario_orientation[l_cell][f];
225 m_cellInformation[l_cell].faceNeighborIds[f] = scenario_neighbor[l_cell][f];
226 } else {
227 m_cellInformation[l_cell].faceTypes[f] = regular;
228 m_cellInformation[l_cell].faceRelations[f][0] = (libxsmm_rng_u32(4));
229 m_cellInformation[l_cell].faceRelations[f][1] = (libxsmm_rng_u32(3));
230 m_cellInformation[l_cell].faceNeighborIds[f] = (libxsmm_rng_u32(i_cells));
231 }
232 }
233 #ifdef __USE_DERS
234 m_cellInformation[l_cell].ltsSetup = 4095;
235 #else
236 m_cellInformation[l_cell].ltsSetup = 0;
237 #endif
238 for (unsigned int f = 0; f < 5; f++) {
239 m_cellInformation[l_cell].currentTime[f] = 0.0;
240 }
241 }
242
243 // DOFs, tIntegrated buffer
244 #ifdef USE_HBM_DOFS
245 hbw_posix_memalign( (void**) &m_dofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
246 #else
247 posix_memalign( (void**) &m_dofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
248 #endif
249
250 #ifdef USE_HBM_TDOFS
251 hbw_posix_memalign( (void**) &m_tdofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
252 #else
253 posix_memalign( (void**) &m_tdofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
254 #endif
255
256 #ifdef __USE_DERS
257 #ifdef USE_HBM_DERS
258 hbw_posix_memalign( (void**) &m_ders, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DERS])*i_cells );
259 #else
260 posix_memalign( (void**) &m_ders, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DERS])*i_cells );
261 #endif
262 #endif
263
264 m_ptdofs = (real**)malloc(sizeof(real*)*i_cells);
265 m_pder = (real**)malloc(sizeof(real*)*i_cells);
266 m_faceNeighbors = (real*)malloc(sizeof(real*[4])*i_cells);
267
268 #ifdef _OPENMP
269 #pragma omp parallel for schedule(static)
270 #endif
271 for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
272 for (unsigned int i = 0; i < NUMBER_OF_ALIGNED_DOFS; i++) {
273 m_dofs[(l_cell*NUMBER_OF_ALIGNED_DOFS)+i] = (real)libxsmm_rng_f64();
274 }
275 for (unsigned int i = 0; i < NUMBER_OF_ALIGNED_DOFS; i++) {
276 m_tdofs[(l_cell*NUMBER_OF_ALIGNED_DOFS)+i] = (real)libxsmm_rng_f64();
277 }
278 }
279 #ifdef __USE_DERS
280 #ifdef _OPENMP
281 #pragma omp parallel for schedule(static)
282 #endif
283 for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
284 for (unsigned int i = 0; i < NUMBER_OF_ALIGNED_DERS; i++) {
285 m_ders[(l_cell*NUMBER_OF_ALIGNED_DERS)+i] = (real)libxsmm_rng_f64();
286 }
287 }
288 #endif
289
290 for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
291 m_ptdofs[l_cell] = &(m_tdofs[(l_cell*NUMBER_OF_ALIGNED_DOFS)]);
292 #ifdef __USE_DERS
293 m_pder[l_cell] = &(m_ders[(l_cell*NUMBER_OF_ALIGNED_DERS)]);
294 #else
295 m_pder[l_cell] = NULL;
296 #endif
297 }
298
299 m_cells = (Cells*)malloc(sizeof(Cells));
300 m_cells->numberOfCells = i_cells;
301 m_cells->dofs = (real(*)[NUMBER_OF_ALIGNED_DOFS])m_dofs;
302 m_cells->buffers = m_ptdofs;
303 m_cells->derivatives = m_pder;
304 m_cells->faceNeighbors = (real*(*)[4])m_faceNeighbors;
305
306 for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
307 for (unsigned int f = 0; f < 4; f++) {
308 if (m_cellInformation[l_cell].faceTypes[f] == outflow) {
309 m_cells->faceNeighbors[l_cell][f] = NULL;
310 } else if (m_cellInformation[l_cell].faceTypes[f] == freeSurface) {
311 #ifdef __USE_DERS
312 m_cells->faceNeighbors[l_cell][f] = m_cells->derivatives[l_cell];
313 #else
314 m_cells->faceNeighbors[l_cell][f] = m_cells->buffers[l_cell];
315 #endif
316 } else if (m_cellInformation[l_cell].faceTypes[f] == periodic || m_cellInformation[l_cell].faceTypes[f] == regular) {
317 #ifdef __USE_DERS
318 m_cells->faceNeighbors[l_cell][f] = m_cells->derivatives[m_cellInformation[l_cell].faceNeighborIds[f]];
319 #else
320 m_cells->faceNeighbors[l_cell][f] = m_cells->buffers[m_cellInformation[l_cell].faceNeighborIds[f]];
321 #endif
322 } else {
323 printf("unsupported boundary type -> exit\n");
324 exit(-1);
325 }
326 }
327 }
328
329 // local integration
330 #ifdef USE_HBM_CELLLOCAL_LOCAL
331 hbw_posix_memalign( (void**) &m_localIntegration, 2097152, i_cells*sizeof(LocalIntegrationData) );
332 #else
333 posix_memalign( (void**) &m_localIntegration, 2097152, i_cells*sizeof(LocalIntegrationData) );
334 #endif
335
336 #ifdef _OPENMP
337 #pragma omp parallel for schedule(static)
338 #endif
339 for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
340 // init star matrices
341 for (size_t m = 0; m < 3; m++) {
342 for (size_t j = 0; j < STAR_NNZ; j++) {
343 m_localIntegration[l_cell].starMatrices[m][j] = (real)libxsmm_rng_f64();
344 }
345 }
346 // init flux solver
347 for (size_t m = 0; m < 4; m++) {
348 for (size_t j = 0; j < NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES; j++) {
349 m_localIntegration[l_cell].nApNm1[m][j] = (real)libxsmm_rng_f64();
350 }
351 }
352 }
353
354 // neighbor integration
355 #ifdef USE_HBM_CELLLOCAL_NEIGH
356 hbw_posix_memalign( (void**) &m_neighboringIntegration, 2097152, i_cells*sizeof(NeighboringIntegrationData) );
357 #else
358 posix_memalign( (void**) &m_neighboringIntegration, 2097152, i_cells*sizeof(NeighboringIntegrationData) );
359 #endif
360
361 #ifdef _OPENMP
362 #pragma omp parallel for schedule(static)
363 #endif
364 for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
365 // init flux solver
366 for (size_t m = 0; m < 4; m++) {
367 for (size_t j = 0; j < NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES; j++) {
368 m_neighboringIntegration[l_cell].nAmNm1[m][j] = (real)libxsmm_rng_f64();
369 }
370 }
371 }
372
373 // CellData
374 m_cellData = (CellData*)malloc(sizeof(CellData));
375 m_cellData->localIntegration = m_localIntegration;
376 m_cellData->neighboringIntegration = m_neighboringIntegration;
377
378 // Global matrices
379 unsigned int l_globalMatrices = NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * seissol::kernels::getNumberOfBasisFunctions( CONVERGENCE_ORDER-1 ) * 3;
380 l_globalMatrices += seissol::kernels::getNumberOfAlignedBasisFunctions( CONVERGENCE_ORDER-1 ) * NUMBER_OF_BASIS_FUNCTIONS * 3;
381 l_globalMatrices += NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * NUMBER_OF_BASIS_FUNCTIONS * 52;
382 l_globalMatrices *= sizeof(real);
383
384 // determine number of global data copies
385 unsigned int l_numberOfThreads = 1;
386 #ifdef _OPENMP
387 #pragma omp parallel
388 {
389 #pragma omp master
390 {
391 l_numberOfThreads = omp_get_num_threads();
392 }
393 }
394 #endif
395 unsigned int l_numberOfCopiesCeil = (l_numberOfThreads%NUMBER_OF_THREADS_PER_GLOBALDATA_COPY == 0) ? 0 : 1;
396 unsigned int l_numberOfCopies = (l_numberOfThreads/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY) + l_numberOfCopiesCeil;
397
398 m_globalPointerArray = (real**) malloc(l_numberOfCopies*sizeof(real*));
399 m_globalDataArray = (GlobalData**) malloc(l_numberOfCopies*sizeof(GlobalData*));
400
401 // @TODO: for NUMA we need to bind this
402 for (unsigned int l_globalDataCount = 0; l_globalDataCount < l_numberOfCopies; l_globalDataCount++) {
403 #ifdef USE_HBM_GLOBALDATA
404 hbw_posix_memalign( (void**) &(m_globalPointerArray[l_globalDataCount]), 2097152, l_globalMatrices );
405 #else
406 posix_memalign( (void**) &(m_globalPointerArray[l_globalDataCount]), 2097152, l_globalMatrices );
407 #endif
408 m_globalPointer = m_globalPointerArray[l_globalDataCount];
409 m_globalDataArray[l_globalDataCount] = (GlobalData*) malloc(sizeof(GlobalData));
410 m_globalData = m_globalDataArray[l_globalDataCount];
411
412 for (unsigned int i = 0; i < (l_globalMatrices/sizeof(real)); i++) {
413 m_globalPointer[i] = (real)libxsmm_rng_f64();
414 }
415
416 real* tmp_pointer = m_globalPointer;
417 // stiffness for time integration
418 for( unsigned int l_transposedStiffnessMatrix = 0; l_transposedStiffnessMatrix < 3; l_transposedStiffnessMatrix++ ) {
419 m_globalData->stiffnessMatricesTransposed[l_transposedStiffnessMatrix] = tmp_pointer;
420 tmp_pointer += seissol::kernels::getNumberOfAlignedBasisFunctions( CONVERGENCE_ORDER-1 ) * NUMBER_OF_BASIS_FUNCTIONS;
421 }
422
423 // stiffness for volume integration
424 for( unsigned int l_stiffnessMatrix = 0; l_stiffnessMatrix < 3; l_stiffnessMatrix++ ) {
425 m_globalData->stiffnessMatrices[l_stiffnessMatrix] = tmp_pointer;
426 tmp_pointer += NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * seissol::kernels::getNumberOfBasisFunctions( CONVERGENCE_ORDER-1 );
427 }
428
429 // flux matrices for boundary integration
430 for( unsigned int l_fluxMatrix = 0; l_fluxMatrix < 52; l_fluxMatrix++ ) {
431 m_globalData->fluxMatrices[l_fluxMatrix] = tmp_pointer;
432 tmp_pointer += NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * NUMBER_OF_BASIS_FUNCTIONS;
433 }
434 }
435
436 // set default to first chunk
437 m_globalPointer = m_globalPointerArray[0];
438 m_globalData = m_globalDataArray[0];
439
440 if (bUseScenario == true ) {
441 free(scenario_faceType);
442 free(scenario_neighbor);
443 free(scenario_side);
444 free(scenario_orientation);
445 }
446
447 return i_cells;
448 }
449
free_data_structures()450 void free_data_structures() {
451 unsigned int l_numberOfThreads = 1;
452 #ifdef _OPENMP
453 #pragma omp parallel
454 {
455 #pragma omp master
456 {
457 l_numberOfThreads = omp_get_num_threads();
458 }
459 }
460 #endif
461 unsigned int l_numberOfCopiesCeil = (l_numberOfThreads%NUMBER_OF_THREADS_PER_GLOBALDATA_COPY == 0) ? 0 : 1;
462 unsigned int l_numberOfCopies = (l_numberOfThreads/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY) + l_numberOfCopiesCeil;
463
464 for (unsigned int l_globalDataCount = 0; l_globalDataCount < l_numberOfCopies; l_globalDataCount++) {
465 m_globalData = m_globalDataArray[l_globalDataCount];
466 free(m_globalData);
467 }
468 free(m_globalDataArray);
469 free(m_cellInformation);
470 free(m_cellData);
471 free(m_cells);
472
473 #ifdef USE_HBM_CELLLOCAL_LOCAL
474 hbw_free(m_localIntegration);
475 #else
476 free(m_localIntegration);
477 #endif
478
479 #ifdef USE_HBM_CELLLOCAL_NEIGH
480 hbw_free(m_neighboringIntegration);
481 #else
482 free(m_neighboringIntegration);
483 #endif
484
485 #ifdef USE_HBM_DOFS
486 hbw_free(m_dofs);
487 #else
488 free(m_dofs);
489 #endif
490
491 #ifdef USE_HBM_TDOFS
492 hbw_free(m_tdofs);
493 #else
494 free(m_tdofs);
495 #endif
496
497 #ifdef __USE_DERS
498 #ifdef USE_HBM_DERS
499 hbw_free(m_ders);
500 #else
501 free(m_ders);
502 #endif
503 #endif
504
505 free(m_ptdofs);
506 free(m_pder);
507 free(m_faceNeighbors);
508
509 for (unsigned int l_globalDataCount = 0; l_globalDataCount < l_numberOfCopies; l_globalDataCount++) {
510 m_globalPointer = m_globalPointerArray[l_globalDataCount];
511 #ifdef USE_HBM_GLOBALDATA
512 hbw_free(m_globalPointer);
513 #else
514 free(m_globalPointer);
515 #endif
516 }
517
518 free(m_globalPointerArray);
519 }
520
521 #endif /*PROXY_SEISSOL_ALLOCATOR_HPP*/
522
523