1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved.                      *
3 * This file is part of the LIBXSMM library.                                   *
4 *                                                                             *
5 * For information on the license, see the LICENSE file.                       *
6 * Further information: https://github.com/hfp/libxsmm/                        *
7 * SPDX-License-Identifier: BSD-3-Clause                                       *
8 ******************************************************************************/
9 /*
10  * Copyright (c) 2013-2014, SeisSol Group
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions are met:
15  *
16  * 1. Redistributions of source code must retain the above copyright notice,
17  *    this list of conditions and the following disclaimer.
18  *
19  * 2. Redistributions in binary form must reproduce the above copyright notice,
20  *    this list of conditions and the following disclaimer in the documentation
21  *    and/or other materials provided with the distribution.
22  *
23  * 3. Neither the name of the copyright holder nor the names of its
24  *    contributors may be used to endorse or promote products derived from this
25  *    software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  **/
39 #ifndef PROXY_SEISSOL_ALLOCATOR_HPP
40 #define PROXY_SEISSOL_ALLOCATOR_HPP
41 
42 struct CellLocalInformation {
43   enum faceType faceTypes[4];
44   int faceRelations[4][2];
45   unsigned int faceNeighborIds[4];
46   unsigned int ltsSetup;
47   double currentTime[5];
48 };
49 
50 struct GlobalData {
51   real *stiffnessMatricesTransposed[3];
52   real *stiffnessMatrices[3];
53   real *fluxMatrices[52];
54 };
55 
56 struct LocalIntegrationData {
57   real starMatrices[3][STAR_NNZ];
58   real nApNm1[4][NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES];
59 };
60 
61 struct NeighboringIntegrationData {
62   real nAmNm1[4][NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES];
63 };
64 
65 struct CellData {
66   struct LocalIntegrationData       *localIntegration;
67   struct NeighboringIntegrationData *neighboringIntegration;
68 };
69 
70 struct Cells {
71   unsigned int numberOfCells;
72   real (*dofs)[NUMBER_OF_ALIGNED_DOFS];
73   real **buffers;
74   real **derivatives;
75   real *(*faceNeighbors)[4];
76 };
77 
78 struct GlobalData **m_globalDataArray;
79 struct GlobalData *m_globalData;
80 struct CellLocalInformation *m_cellInformation;
81 struct CellData *m_cellData;
82 struct Cells *m_cells;
83 struct LocalIntegrationData *m_localIntegration;
84 struct NeighboringIntegrationData * m_neighboringIntegration;
85 
86 seissol::kernels::Time     m_timeKernel;
87 seissol::kernels::Volume   m_volumeKernel;
88 seissol::kernels::Boundary m_boundaryKernel;
89 
90 /* This option is needed to avoid pollution of low-level caches */
91 #define NUMBER_OF_THREADS_PER_GLOBALDATA_COPY 4
92 #ifndef NUMBER_OF_THREADS_PER_GLOBALDATA_COPY
93 #define NUMBER_OF_THREADS_PER_GLOBALDATA_COPY 16383
94 #endif
95 
96 real m_timeStepWidthSimulation = (real)1.0;
97 real* m_dofs;
98 real* m_tdofs;
99 #ifdef __USE_DERS
100 real* m_ders;
101 #endif
102 real** m_ptdofs;
103 real** m_pder;
104 real* m_faceNeighbors;
105 real** m_globalPointerArray;
106 real* m_globalPointer;
107 
init_data_structures(unsigned int i_cells)108 unsigned int init_data_structures(unsigned int i_cells) {
109   // check if we have to read on scenario
110   char* pScenario;
111   std::string s_scenario;
112   bool bUseScenario = false;
113   unsigned int (*scenario_faceType)[4];
114   unsigned int (*scenario_neighbor)[4];
115   unsigned int (*scenario_side)[4];
116   unsigned int (*scenario_orientation)[4];
117 
118   pScenario = getenv ("SEISSOL_PROXY_SCENARIO");
119   if (pScenario !=NULL ) {
120     bUseScenario = true;
121     s_scenario.assign(pScenario);
122     std::string file;
123     std::ifstream data;
124     size_t reads;
125     unsigned int value;
126 
127     // read scenario size
128     file = s_scenario + ".size";
129     data.open(file.c_str());
130     reads = 0;
131     if (!data) { printf("size of scenario couldn't be read!\n"); exit(-1); }
132     while (data >> i_cells) {
133       printf("scenario name is: %s\n", s_scenario.c_str());
134       printf("scenario has %i cells\n", i_cells);
135       reads++;
136     }
137     data.close();
138     if (reads != 1) { printf("wrong number of sizes (%i) in scenario were read!\n", reads); exit(-1); }
139 
140     scenario_neighbor = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
141     scenario_faceType = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
142     scenario_side = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
143     scenario_orientation = (unsigned int(*)[4]) malloc(i_cells*sizeof(unsigned int[4]));
144 
145     // read neighbors
146     file = s_scenario + ".neigh";
147     data.open(file.c_str());
148     if (!data) { printf("neigh of scenario couldn't be read!\n"); exit(-1); }
149     reads = 0;
150     while (data >> value) {
151       scenario_neighbor[reads/4][reads%4] = value;
152       reads++;
153     }
154     data.close();
155     if (reads != i_cells*4) { printf("wrong neigh (%i) in scenario were read!\n", reads); exit(-1); }
156 
157     // read faceTypes
158     file = s_scenario + ".bound";
159     data.open(file.c_str());
160     if (!data) { printf("bound of scenario couldn't be read!\n"); exit(-1); }
161     reads = 0;
162     while (data >> value) {
163       scenario_faceType[reads/4][reads%4] = value;
164       reads++;
165     }
166     data.close();
167     if (reads != i_cells*4) { printf("wrong faceType (%i) in scenario were read!\n", reads); exit(-1); }
168 
169     // read sides
170     file = s_scenario + ".sides";
171     data.open(file.c_str());
172     if (!data) { printf("sides of scenario couldn't be read!\n"); exit(-1); }
173     reads = 0;
174     while (data >> value) {
175       scenario_side[reads/4][reads%4] = value;
176       reads++;
177     }
178     data.close();
179     if (reads != i_cells*4) { printf("wrong sides (%i) in scenario were read!\n", reads); exit(-1); }
180 
181     // read orientation
182     file = s_scenario + ".orient";
183     data.open(file.c_str());
184     if (!data) { printf("orientations of scenario couldn't be read!\n"); exit(-1); }
185     reads = 0;
186     while (data >> value) {
187       scenario_orientation[reads/4][reads%4] = value;
188       reads++;
189     }
190     data.close();
191     if (reads != i_cells*4) { printf("wrong orientations (%i) in scenario were read!\n", reads); exit(-1); }
192   }
193 
194   // init RNG
195   libxsmm_rng_set_seed(i_cells);
196 
197   // cell information
198   m_cellInformation = (CellLocalInformation*)malloc(i_cells*sizeof(CellLocalInformation));
199   for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
200     for (unsigned int f = 0; f < 4; f++) {
201       if (bUseScenario == true ) {
202         switch (scenario_faceType[l_cell][f]) {
203           case 0:
204             m_cellInformation[l_cell].faceTypes[f] = regular;
205             break;
206           case 1:
207             m_cellInformation[l_cell].faceTypes[f] = freeSurface;
208             break;
209           case 3:
210             m_cellInformation[l_cell].faceTypes[f] = dynamicRupture;
211             break;
212           case 5:
213             m_cellInformation[l_cell].faceTypes[f] = outflow;
214             break;
215           case 6:
216             m_cellInformation[l_cell].faceTypes[f] = periodic;
217             break;
218           default:
219             printf("unsupported faceType (%i)!\n", scenario_faceType[l_cell][f]);
220             exit(-1);
221             break;
222         }
223         m_cellInformation[l_cell].faceRelations[f][0] = scenario_side[l_cell][f];
224         m_cellInformation[l_cell].faceRelations[f][1] = scenario_orientation[l_cell][f];
225         m_cellInformation[l_cell].faceNeighborIds[f] =  scenario_neighbor[l_cell][f];
226       } else {
227         m_cellInformation[l_cell].faceTypes[f] = regular;
228         m_cellInformation[l_cell].faceRelations[f][0] = (libxsmm_rng_u32(4));
229         m_cellInformation[l_cell].faceRelations[f][1] = (libxsmm_rng_u32(3));
230         m_cellInformation[l_cell].faceNeighborIds[f] = (libxsmm_rng_u32(i_cells));
231       }
232     }
233 #ifdef __USE_DERS
234     m_cellInformation[l_cell].ltsSetup = 4095;
235 #else
236     m_cellInformation[l_cell].ltsSetup = 0;
237 #endif
238     for (unsigned int f = 0; f < 5; f++) {
239       m_cellInformation[l_cell].currentTime[f] = 0.0;
240     }
241   }
242 
243   // DOFs, tIntegrated buffer
244 #ifdef USE_HBM_DOFS
245   hbw_posix_memalign( (void**) &m_dofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
246 #else
247   posix_memalign( (void**) &m_dofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
248 #endif
249 
250 #ifdef USE_HBM_TDOFS
251   hbw_posix_memalign( (void**) &m_tdofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
252 #else
253   posix_memalign( (void**) &m_tdofs, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DOFS])*i_cells );
254 #endif
255 
256 #ifdef __USE_DERS
257 #ifdef USE_HBM_DERS
258   hbw_posix_memalign( (void**) &m_ders, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DERS])*i_cells );
259 #else
260   posix_memalign( (void**) &m_ders, 2097152, sizeof(real[NUMBER_OF_ALIGNED_DERS])*i_cells );
261 #endif
262 #endif
263 
264   m_ptdofs = (real**)malloc(sizeof(real*)*i_cells);
265   m_pder = (real**)malloc(sizeof(real*)*i_cells);
266   m_faceNeighbors = (real*)malloc(sizeof(real*[4])*i_cells);
267 
268 #ifdef _OPENMP
269   #pragma omp parallel for schedule(static)
270 #endif
271   for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
272     for (unsigned int i = 0; i < NUMBER_OF_ALIGNED_DOFS; i++) {
273       m_dofs[(l_cell*NUMBER_OF_ALIGNED_DOFS)+i] = (real)libxsmm_rng_f64();
274     }
275     for (unsigned int i = 0; i < NUMBER_OF_ALIGNED_DOFS; i++) {
276       m_tdofs[(l_cell*NUMBER_OF_ALIGNED_DOFS)+i] = (real)libxsmm_rng_f64();
277     }
278   }
279 #ifdef __USE_DERS
280 #ifdef _OPENMP
281   #pragma omp parallel for schedule(static)
282 #endif
283   for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
284     for (unsigned int i = 0; i < NUMBER_OF_ALIGNED_DERS; i++) {
285       m_ders[(l_cell*NUMBER_OF_ALIGNED_DERS)+i] = (real)libxsmm_rng_f64();
286     }
287   }
288 #endif
289 
290   for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
291     m_ptdofs[l_cell] = &(m_tdofs[(l_cell*NUMBER_OF_ALIGNED_DOFS)]);
292 #ifdef __USE_DERS
293     m_pder[l_cell] = &(m_ders[(l_cell*NUMBER_OF_ALIGNED_DERS)]);
294 #else
295     m_pder[l_cell] = NULL;
296 #endif
297   }
298 
299   m_cells = (Cells*)malloc(sizeof(Cells));
300   m_cells->numberOfCells = i_cells;
301   m_cells->dofs = (real(*)[NUMBER_OF_ALIGNED_DOFS])m_dofs;
302   m_cells->buffers = m_ptdofs;
303   m_cells->derivatives = m_pder;
304   m_cells->faceNeighbors = (real*(*)[4])m_faceNeighbors;
305 
306   for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
307     for (unsigned int f = 0; f < 4; f++) {
308       if (m_cellInformation[l_cell].faceTypes[f] == outflow) {
309         m_cells->faceNeighbors[l_cell][f] = NULL;
310       } else if (m_cellInformation[l_cell].faceTypes[f] == freeSurface) {
311 #ifdef __USE_DERS
312         m_cells->faceNeighbors[l_cell][f] = m_cells->derivatives[l_cell];
313 #else
314         m_cells->faceNeighbors[l_cell][f] = m_cells->buffers[l_cell];
315 #endif
316       } else if (m_cellInformation[l_cell].faceTypes[f] == periodic || m_cellInformation[l_cell].faceTypes[f] == regular) {
317 #ifdef __USE_DERS
318         m_cells->faceNeighbors[l_cell][f] = m_cells->derivatives[m_cellInformation[l_cell].faceNeighborIds[f]];
319 #else
320         m_cells->faceNeighbors[l_cell][f] = m_cells->buffers[m_cellInformation[l_cell].faceNeighborIds[f]];
321 #endif
322       } else {
323         printf("unsupported boundary type -> exit\n");
324         exit(-1);
325       }
326     }
327   }
328 
329   // local integration
330 #ifdef USE_HBM_CELLLOCAL_LOCAL
331   hbw_posix_memalign( (void**) &m_localIntegration, 2097152, i_cells*sizeof(LocalIntegrationData) );
332 #else
333   posix_memalign( (void**) &m_localIntegration, 2097152, i_cells*sizeof(LocalIntegrationData) );
334 #endif
335 
336 #ifdef _OPENMP
337   #pragma omp parallel for schedule(static)
338 #endif
339   for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
340     // init star matrices
341     for (size_t m = 0; m < 3; m++) {
342       for (size_t j = 0; j < STAR_NNZ; j++) {
343         m_localIntegration[l_cell].starMatrices[m][j] = (real)libxsmm_rng_f64();
344       }
345     }
346     // init flux solver
347     for (size_t m = 0; m < 4; m++) {
348       for (size_t j = 0; j < NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES; j++) {
349         m_localIntegration[l_cell].nApNm1[m][j] = (real)libxsmm_rng_f64();
350       }
351     }
352   }
353 
354   // neighbor integration
355 #ifdef USE_HBM_CELLLOCAL_NEIGH
356   hbw_posix_memalign( (void**) &m_neighboringIntegration, 2097152, i_cells*sizeof(NeighboringIntegrationData) );
357 #else
358   posix_memalign( (void**) &m_neighboringIntegration, 2097152, i_cells*sizeof(NeighboringIntegrationData) );
359 #endif
360 
361 #ifdef _OPENMP
362   #pragma omp parallel for schedule(static)
363 #endif
364   for (unsigned int l_cell = 0; l_cell < i_cells; l_cell++) {
365     // init flux solver
366     for (size_t m = 0; m < 4; m++) {
367       for (size_t j = 0; j < NUMBER_OF_QUANTITIES*NUMBER_OF_QUANTITIES; j++) {
368         m_neighboringIntegration[l_cell].nAmNm1[m][j] = (real)libxsmm_rng_f64();
369       }
370     }
371   }
372 
373   // CellData
374   m_cellData = (CellData*)malloc(sizeof(CellData));
375   m_cellData->localIntegration = m_localIntegration;
376   m_cellData->neighboringIntegration = m_neighboringIntegration;
377 
378   // Global matrices
379   unsigned int l_globalMatrices  = NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * seissol::kernels::getNumberOfBasisFunctions( CONVERGENCE_ORDER-1 ) * 3;
380                l_globalMatrices += seissol::kernels::getNumberOfAlignedBasisFunctions( CONVERGENCE_ORDER-1 ) * NUMBER_OF_BASIS_FUNCTIONS  * 3;
381                l_globalMatrices += NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * NUMBER_OF_BASIS_FUNCTIONS * 52;
382                l_globalMatrices *= sizeof(real);
383 
384   // determine number of global data copies
385   unsigned int l_numberOfThreads = 1;
386 #ifdef _OPENMP
387   #pragma omp parallel
388   {
389     #pragma omp master
390     {
391       l_numberOfThreads = omp_get_num_threads();
392     }
393   }
394 #endif
395   unsigned int l_numberOfCopiesCeil = (l_numberOfThreads%NUMBER_OF_THREADS_PER_GLOBALDATA_COPY == 0) ? 0 : 1;
396   unsigned int l_numberOfCopies = (l_numberOfThreads/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY) + l_numberOfCopiesCeil;
397 
398   m_globalPointerArray = (real**) malloc(l_numberOfCopies*sizeof(real*));
399   m_globalDataArray = (GlobalData**) malloc(l_numberOfCopies*sizeof(GlobalData*));
400 
401   // @TODO: for NUMA we need to bind this
402   for (unsigned int l_globalDataCount = 0; l_globalDataCount < l_numberOfCopies; l_globalDataCount++) {
403 #ifdef USE_HBM_GLOBALDATA
404     hbw_posix_memalign( (void**) &(m_globalPointerArray[l_globalDataCount]), 2097152, l_globalMatrices );
405 #else
406     posix_memalign( (void**) &(m_globalPointerArray[l_globalDataCount]), 2097152, l_globalMatrices );
407 #endif
408     m_globalPointer = m_globalPointerArray[l_globalDataCount];
409     m_globalDataArray[l_globalDataCount] = (GlobalData*) malloc(sizeof(GlobalData));
410     m_globalData =  m_globalDataArray[l_globalDataCount];
411 
412     for (unsigned int i = 0; i < (l_globalMatrices/sizeof(real)); i++) {
413       m_globalPointer[i] = (real)libxsmm_rng_f64();
414     }
415 
416     real* tmp_pointer = m_globalPointer;
417     // stiffness for time integration
418     for( unsigned int l_transposedStiffnessMatrix = 0; l_transposedStiffnessMatrix < 3; l_transposedStiffnessMatrix++ ) {
419       m_globalData->stiffnessMatricesTransposed[l_transposedStiffnessMatrix] = tmp_pointer;
420       tmp_pointer += seissol::kernels::getNumberOfAlignedBasisFunctions( CONVERGENCE_ORDER-1 ) * NUMBER_OF_BASIS_FUNCTIONS;
421     }
422 
423     // stiffness for volume integration
424     for( unsigned int l_stiffnessMatrix = 0; l_stiffnessMatrix < 3; l_stiffnessMatrix++ ) {
425       m_globalData->stiffnessMatrices[l_stiffnessMatrix] = tmp_pointer;
426       tmp_pointer += NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * seissol::kernels::getNumberOfBasisFunctions( CONVERGENCE_ORDER-1 );
427     }
428 
429     // flux matrices for boundary integration
430     for( unsigned int l_fluxMatrix = 0; l_fluxMatrix < 52; l_fluxMatrix++ ) {
431       m_globalData->fluxMatrices[l_fluxMatrix] = tmp_pointer;
432       tmp_pointer += NUMBER_OF_ALIGNED_BASIS_FUNCTIONS * NUMBER_OF_BASIS_FUNCTIONS;
433     }
434   }
435 
436   // set default to first chunk
437   m_globalPointer = m_globalPointerArray[0];
438   m_globalData = m_globalDataArray[0];
439 
440   if (bUseScenario == true ) {
441     free(scenario_faceType);
442     free(scenario_neighbor);
443     free(scenario_side);
444     free(scenario_orientation);
445   }
446 
447   return i_cells;
448 }
449 
free_data_structures()450 void free_data_structures() {
451   unsigned int l_numberOfThreads = 1;
452 #ifdef _OPENMP
453   #pragma omp parallel
454   {
455     #pragma omp master
456     {
457       l_numberOfThreads = omp_get_num_threads();
458     }
459   }
460 #endif
461   unsigned int l_numberOfCopiesCeil = (l_numberOfThreads%NUMBER_OF_THREADS_PER_GLOBALDATA_COPY == 0) ? 0 : 1;
462   unsigned int l_numberOfCopies = (l_numberOfThreads/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY) + l_numberOfCopiesCeil;
463 
464   for (unsigned int l_globalDataCount = 0; l_globalDataCount < l_numberOfCopies; l_globalDataCount++) {
465     m_globalData =  m_globalDataArray[l_globalDataCount];
466     free(m_globalData);
467   }
468   free(m_globalDataArray);
469   free(m_cellInformation);
470   free(m_cellData);
471   free(m_cells);
472 
473 #ifdef USE_HBM_CELLLOCAL_LOCAL
474   hbw_free(m_localIntegration);
475 #else
476   free(m_localIntegration);
477 #endif
478 
479 #ifdef USE_HBM_CELLLOCAL_NEIGH
480   hbw_free(m_neighboringIntegration);
481 #else
482   free(m_neighboringIntegration);
483 #endif
484 
485 #ifdef USE_HBM_DOFS
486   hbw_free(m_dofs);
487 #else
488   free(m_dofs);
489 #endif
490 
491 #ifdef USE_HBM_TDOFS
492   hbw_free(m_tdofs);
493 #else
494   free(m_tdofs);
495 #endif
496 
497 #ifdef __USE_DERS
498 #ifdef USE_HBM_DERS
499   hbw_free(m_ders);
500 #else
501   free(m_ders);
502 #endif
503 #endif
504 
505   free(m_ptdofs);
506   free(m_pder);
507   free(m_faceNeighbors);
508 
509   for (unsigned int l_globalDataCount = 0; l_globalDataCount < l_numberOfCopies; l_globalDataCount++) {
510     m_globalPointer = m_globalPointerArray[l_globalDataCount];
511 #ifdef USE_HBM_GLOBALDATA
512     hbw_free(m_globalPointer);
513 #else
514     free(m_globalPointer);
515 #endif
516   }
517 
518   free(m_globalPointerArray);
519 }
520 
521 #endif /*PROXY_SEISSOL_ALLOCATOR_HPP*/
522 
523