1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved. *
3 * This file is part of the LIBXSMM library. *
4 * *
5 * For information on the license, see the LICENSE file. *
6 * Further information: https://github.com/hfp/libxsmm/ *
7 * SPDX-License-Identifier: BSD-3-Clause *
8 ******************************************************************************/
9 #ifndef PROXY_SEISSOL_INTEGRATORS_HPP
10 #define PROXY_SEISSOL_INTEGRATORS_HPP
11
12 #if defined(_OPENMP)
13 # include <omp.h>
14 #endif
15
computeAderIntegration()16 void computeAderIntegration() {
17 #ifdef _OPENMP
18 # pragma omp parallel
19 {
20 #if NUMBER_OF_THREADS_PER_GLOBALDATA_COPY < 512
21 //GlobalData* l_globalData = m_globalDataArray[omp_get_thread_num()/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY];
22 GlobalData* l_globalData = m_globalDataArray[0];
23 #else
24 GlobalData* l_globalData = m_globalData;
25 #endif
26 #pragma omp for schedule(static)
27 #else
28 GlobalData* l_globalData = m_globalData;
29 #endif
30 for( unsigned int l_cell = 0; l_cell < m_cells->numberOfCells; l_cell++ ) {
31 m_timeKernel.computeAder( m_timeStepWidthSimulation,
32 l_globalData->stiffnessMatricesTransposed,
33 m_cells->dofs[l_cell],
34 m_cellData->localIntegration[l_cell].starMatrices,
35 m_cells->buffers[l_cell],
36 m_cells->derivatives[l_cell] );
37 }
38 #ifdef _OPENMP
39 }
40 #endif
41 }
42
computeVolumeIntegration()43 void computeVolumeIntegration() {
44 #ifdef _OPENMP
45 # pragma omp parallel
46 {
47 #if NUMBER_OF_THREADS_PER_GLOBALDATA_COPY < 512
48 //GlobalData* l_globalData = m_globalDataArray[omp_get_thread_num()/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY];
49 GlobalData* l_globalData = m_globalDataArray[0];
50 #else
51 GlobalData* l_globalData = m_globalData;
52 #endif
53 #pragma omp for schedule(static)
54 #else
55 GlobalData* l_globalData = m_globalData;
56 #endif
57 for( unsigned int l_cell = 0; l_cell < m_cells->numberOfCells; l_cell++ ) {
58 m_volumeKernel.computeIntegral( l_globalData->stiffnessMatrices,
59 m_cells->buffers[l_cell],
60 m_cellData->localIntegration[l_cell].starMatrices,
61 m_cells->dofs[l_cell] );
62 }
63 #ifdef _OPENMP
64 }
65 #endif
66 }
67
computeLocalBoundaryIntegration()68 void computeLocalBoundaryIntegration() {
69 #ifdef _OPENMP
70 #pragma omp parallel
71 {
72 #if NUMBER_OF_THREADS_PER_GLOBALDATA_COPY < 512
73 //GlobalData* l_globalData = m_globalDataArray[omp_get_thread_num()/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY];
74 GlobalData* l_globalData = m_globalDataArray[0];
75 #else
76 GlobalData* l_globalData = m_globalData;
77 #endif
78 #pragma omp for schedule(static)
79 #else
80 GlobalData* l_globalData = m_globalData;
81 #endif
82 for( unsigned int l_cell = 0; l_cell < m_cells->numberOfCells; l_cell++ ) {
83 m_boundaryKernel.computeLocalIntegral( m_cellInformation[l_cell].faceTypes,
84 l_globalData->fluxMatrices,
85 m_cells->buffers[l_cell],
86 m_cellData->localIntegration[l_cell].nApNm1,
87 #ifdef ENABLE_STREAM_MATRIX_PREFETCH
88 m_cells->dofs[l_cell],
89 m_cells->buffers[l_cell+1],
90 m_cells->dofs[l_cell+1] );
91 #else
92 m_cells->dofs[l_cell] );
93 #endif
94 }
95 #ifdef _OPENMP
96 }
97 #endif
98 }
99
computeLocalIntegration()100 void computeLocalIntegration() {
101 #ifdef _OPENMP
102 #pragma omp parallel
103 {
104 #if NUMBER_OF_THREADS_PER_GLOBALDATA_COPY < 512
105 //GlobalData* l_globalData = m_globalDataArray[omp_get_thread_num()/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY];
106 GlobalData* l_globalData = m_globalDataArray[0];
107 #else
108 GlobalData* l_globalData = m_globalData;
109 #endif
110 #pragma omp for schedule(static)
111 #else
112 GlobalData* l_globalData = m_globalData;
113 #endif
114 for( unsigned int l_cell = 0; l_cell < m_cells->numberOfCells; l_cell++ ) {
115 m_timeKernel.computeAder( (double)m_timeStepWidthSimulation,
116 l_globalData->stiffnessMatricesTransposed,
117 m_cells->dofs[l_cell],
118 m_cellData->localIntegration[l_cell].starMatrices,
119 m_cells->buffers[l_cell],
120 m_cells->derivatives[l_cell] );
121
122 m_volumeKernel.computeIntegral( l_globalData->stiffnessMatrices,
123 m_cells->buffers[l_cell],
124 m_cellData->localIntegration[l_cell].starMatrices,
125 m_cells->dofs[l_cell] );
126
127 m_boundaryKernel.computeLocalIntegral( m_cellInformation[l_cell].faceTypes,
128 l_globalData->fluxMatrices,
129 m_cells->buffers[l_cell],
130 m_cellData->localIntegration[l_cell].nApNm1,
131 #ifdef ENABLE_STREAM_MATRIX_PREFETCH
132 m_cells->dofs[l_cell],
133 m_cells->buffers[l_cell+1],
134 m_cells->dofs[l_cell+1] );
135 #else
136 m_cells->dofs[l_cell] );
137 #endif
138 }
139 #ifdef _OPENMP
140 }
141 #endif
142 }
143
computeNeighboringIntegration()144 void computeNeighboringIntegration() {
145 real l_integrationBuffer[4][NUMBER_OF_ALIGNED_DOFS] __attribute__((aligned(4096)));
146 real *l_timeIntegrated[4];
147 #ifdef ENABLE_MATRIX_PREFETCH
148 real *l_faceNeighbors_prefetch[4];
149 real *l_fluxMatricies_prefetch[4];
150 #endif
151
152 #ifdef _OPENMP
153 #ifdef ENABLE_MATRIX_PREFETCH
154 #pragma omp parallel private(l_integrationBuffer, l_timeIntegrated, l_faceNeighbors_prefetch, l_fluxMatricies_prefetch)
155 #else
156 #pragma omp parallel private(l_integrationBuffer, l_timeIntegrated)
157 #endif
158 {
159 #if NUMBER_OF_THREADS_PER_GLOBALDATA_COPY < 512
160 GlobalData* l_globalData = m_globalDataArray[omp_get_thread_num()/NUMBER_OF_THREADS_PER_GLOBALDATA_COPY];
161 #else
162 GlobalData* l_globalData = m_globalData;
163 #endif
164 #pragma omp for schedule(static)
165 #else
166 GlobalData* l_globalData = m_globalData;
167 #endif
168 for( int l_cell = 0; l_cell < m_cells->numberOfCells; l_cell++ ) {
169 m_timeKernel.computeIntegrals( m_cellInformation[l_cell].ltsSetup,
170 m_cellInformation[l_cell].faceTypes,
171 m_cellInformation[l_cell].currentTime,
172 (double)m_timeStepWidthSimulation,
173 m_cells->faceNeighbors[l_cell],
174 l_integrationBuffer,
175 l_timeIntegrated );
176
177 #ifdef ENABLE_MATRIX_PREFETCH
178 #pragma message("the current prefetch structure (flux matrices and tDOFs is tuned for higher order and shouldn't be harmful for lower orders")
179 int l_face = 1;
180 l_faceNeighbors_prefetch[0] = m_cells->faceNeighbors[l_cell][l_face];
181 l_fluxMatricies_prefetch[0] = l_globalData->fluxMatrices[4+(l_face*12)
182 +(m_cellInformation[l_cell].faceRelations[l_face][0]*3)
183 +(m_cellInformation[l_cell].faceRelations[l_face][1])];
184 l_face = 2;
185 l_faceNeighbors_prefetch[1] = m_cells->faceNeighbors[l_cell][l_face];
186 l_fluxMatricies_prefetch[1] = l_globalData->fluxMatrices[4+(l_face*12)
187 +(m_cellInformation[l_cell].faceRelations[l_face][0]*3)
188 +(m_cellInformation[l_cell].faceRelations[l_face][1])];
189 l_face = 3;
190 l_faceNeighbors_prefetch[2] = m_cells->faceNeighbors[l_cell][l_face];
191 l_fluxMatricies_prefetch[2] = l_globalData->fluxMatrices[4+(l_face*12)
192 +(m_cellInformation[l_cell].faceRelations[l_face][0]*3)
193 +(m_cellInformation[l_cell].faceRelations[l_face][1])];
194 l_face = 0;
195 if (l_cell < (m_cells->numberOfCells-1) ) {
196 l_faceNeighbors_prefetch[3] = m_cells->faceNeighbors[l_cell+1][l_face];
197 l_fluxMatricies_prefetch[3] = l_globalData->fluxMatrices[4+(l_face*12)
198 +(m_cellInformation[l_cell+1].faceRelations[l_face][0]*3)
199 +(m_cellInformation[l_cell+1].faceRelations[l_face][1])];
200 } else {
201 l_faceNeighbors_prefetch[3] = m_cells->faceNeighbors[l_cell][3];
202 l_fluxMatricies_prefetch[3] = l_globalData->fluxMatrices[4+(3*12)
203 +(m_cellInformation[l_cell].faceRelations[l_face][0]*3)
204 +(m_cellInformation[l_cell].faceRelations[l_face][1])];
205 }
206 #endif
207
208 m_boundaryKernel.computeNeighborsIntegral( m_cellInformation[l_cell].faceTypes,
209 m_cellInformation[l_cell].faceRelations,
210 l_globalData->fluxMatrices,
211 l_timeIntegrated,
212 m_cellData->neighboringIntegration[l_cell].nAmNm1,
213 #ifdef ENABLE_MATRIX_PREFETCH
214 m_cells->dofs[l_cell],
215 l_faceNeighbors_prefetch,
216 l_fluxMatricies_prefetch );
217 #else
218 m_cells->dofs[l_cell]);
219 #endif
220 }
221
222 #ifdef _OPENMP
223 }
224 #endif
225 }
226
227 #endif /*PROXY_SEISSOL_INTEGRATORS_HPP*/
228
229