1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15
16 using namespace std;
17
18 namespace mfem
19 {
20
PAHcurlMassApply2D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)21 void PAHcurlMassApply2D(const int D1D,
22 const int Q1D,
23 const int NE,
24 const bool symmetric,
25 const Array<double> &bo,
26 const Array<double> &bc,
27 const Array<double> &bot,
28 const Array<double> &bct,
29 const Vector &pa_data,
30 const Vector &x,
31 Vector &y)
32 {
33 constexpr static int VDIM = 2;
34 constexpr static int MAX_D1D = HCURL_MAX_D1D;
35 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
36
37 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
38 auto Bc = Reshape(bc.Read(), Q1D, D1D);
39 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
40 auto Bct = Reshape(bct.Read(), D1D, Q1D);
41 auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
42 auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
43 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
44
45 MFEM_FORALL(e, NE,
46 {
47 double mass[MAX_Q1D][MAX_Q1D][VDIM];
48
49 for (int qy = 0; qy < Q1D; ++qy)
50 {
51 for (int qx = 0; qx < Q1D; ++qx)
52 {
53 for (int c = 0; c < VDIM; ++c)
54 {
55 mass[qy][qx][c] = 0.0;
56 }
57 }
58 }
59
60 int osc = 0;
61
62 for (int c = 0; c < VDIM; ++c) // loop over x, y components
63 {
64 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
65 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
66
67 for (int dy = 0; dy < D1Dy; ++dy)
68 {
69 double massX[MAX_Q1D];
70 for (int qx = 0; qx < Q1D; ++qx)
71 {
72 massX[qx] = 0.0;
73 }
74
75 for (int dx = 0; dx < D1Dx; ++dx)
76 {
77 const double t = X(dx + (dy * D1Dx) + osc, e);
78 for (int qx = 0; qx < Q1D; ++qx)
79 {
80 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
81 }
82 }
83
84 for (int qy = 0; qy < Q1D; ++qy)
85 {
86 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
87 for (int qx = 0; qx < Q1D; ++qx)
88 {
89 mass[qy][qx][c] += massX[qx] * wy;
90 }
91 }
92 }
93
94 osc += D1Dx * D1Dy;
95 } // loop (c) over components
96
97 // Apply D operator.
98 for (int qy = 0; qy < Q1D; ++qy)
99 {
100 for (int qx = 0; qx < Q1D; ++qx)
101 {
102 const double O11 = op(qx,qy,0,e);
103 const double O21 = op(qx,qy,1,e);
104 const double O12 = symmetric ? O21 : op(qx,qy,2,e);
105 const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
106 const double massX = mass[qy][qx][0];
107 const double massY = mass[qy][qx][1];
108 mass[qy][qx][0] = (O11*massX)+(O12*massY);
109 mass[qy][qx][1] = (O21*massX)+(O22*massY);
110 }
111 }
112
113 for (int qy = 0; qy < Q1D; ++qy)
114 {
115 osc = 0;
116
117 for (int c = 0; c < VDIM; ++c) // loop over x, y components
118 {
119 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
120 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
121
122 double massX[MAX_D1D];
123 for (int dx = 0; dx < D1Dx; ++dx)
124 {
125 massX[dx] = 0.0;
126 }
127 for (int qx = 0; qx < Q1D; ++qx)
128 {
129 for (int dx = 0; dx < D1Dx; ++dx)
130 {
131 massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
132 }
133 }
134
135 for (int dy = 0; dy < D1Dy; ++dy)
136 {
137 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
138
139 for (int dx = 0; dx < D1Dx; ++dx)
140 {
141 Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
142 }
143 }
144
145 osc += D1Dx * D1Dy;
146 } // loop c
147 } // loop qy
148 }); // end of element loop
149 }
150
PAHcurlMassAssembleDiagonal2D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Vector & pa_data,Vector & diag)151 void PAHcurlMassAssembleDiagonal2D(const int D1D,
152 const int Q1D,
153 const int NE,
154 const bool symmetric,
155 const Array<double> &bo,
156 const Array<double> &bc,
157 const Vector &pa_data,
158 Vector &diag)
159 {
160 constexpr static int VDIM = 2;
161 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
162
163 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
164 auto Bc = Reshape(bc.Read(), Q1D, D1D);
165 auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
166 auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
167
168 MFEM_FORALL(e, NE,
169 {
170 int osc = 0;
171
172 for (int c = 0; c < VDIM; ++c) // loop over x, y components
173 {
174 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
175 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
176
177 double mass[MAX_Q1D];
178
179 for (int dy = 0; dy < D1Dy; ++dy)
180 {
181 for (int qx = 0; qx < Q1D; ++qx)
182 {
183 mass[qx] = 0.0;
184 for (int qy = 0; qy < Q1D; ++qy)
185 {
186 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
187
188 mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
189 op(qx,qy,symmetric ? 2 : 3, e));
190 }
191 }
192
193 for (int dx = 0; dx < D1Dx; ++dx)
194 {
195 for (int qx = 0; qx < Q1D; ++qx)
196 {
197 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
198 D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
199 }
200 }
201 }
202
203 osc += D1Dx * D1Dy;
204 } // loop c
205 }); // end of element loop
206 }
207
PAHcurlMassAssembleDiagonal3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Vector & pa_data,Vector & diag)208 void PAHcurlMassAssembleDiagonal3D(const int D1D,
209 const int Q1D,
210 const int NE,
211 const bool symmetric,
212 const Array<double> &bo,
213 const Array<double> &bc,
214 const Vector &pa_data,
215 Vector &diag)
216 {
217 constexpr static int MAX_D1D = HCURL_MAX_D1D;
218 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
219
220 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
221 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
222 constexpr static int VDIM = 3;
223
224 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
225 auto Bc = Reshape(bc.Read(), Q1D, D1D);
226 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
227 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
228
229 MFEM_FORALL(e, NE,
230 {
231 int osc = 0;
232
233 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
234 {
235 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
236 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
237 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
238
239 const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
240 (symmetric ? 5 : 8));
241
242 double mass[MAX_Q1D];
243
244 for (int dz = 0; dz < D1Dz; ++dz)
245 {
246 for (int dy = 0; dy < D1Dy; ++dy)
247 {
248 for (int qx = 0; qx < Q1D; ++qx)
249 {
250 mass[qx] = 0.0;
251 for (int qy = 0; qy < Q1D; ++qy)
252 {
253 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
254
255 for (int qz = 0; qz < Q1D; ++qz)
256 {
257 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
258
259 mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
260 }
261 }
262 }
263
264 for (int dx = 0; dx < D1Dx; ++dx)
265 {
266 for (int qx = 0; qx < Q1D; ++qx)
267 {
268 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
269 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
270 }
271 }
272 }
273 }
274
275 osc += D1Dx * D1Dy * D1Dz;
276 } // loop c
277 }); // end of element loop
278 }
279
280 template<int T_D1D, int T_Q1D>
SmemPAHcurlMassAssembleDiagonal3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Vector & pa_data,Vector & diag)281 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
282 const int Q1D,
283 const int NE,
284 const bool symmetric,
285 const Array<double> &bo,
286 const Array<double> &bc,
287 const Vector &pa_data,
288 Vector &diag)
289 {
290 MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
291 MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
292
293 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
294 auto Bc = Reshape(bc.Read(), Q1D, D1D);
295 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
296 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
297
298 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
299 {
300 constexpr int VDIM = 3;
301 constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
302 constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
303
304 MFEM_SHARED double sBo[tQ1D][tD1D];
305 MFEM_SHARED double sBc[tQ1D][tD1D];
306
307 double op3[3];
308 MFEM_SHARED double sop[3][tQ1D][tQ1D];
309
310 MFEM_FOREACH_THREAD(qx,x,Q1D)
311 {
312 MFEM_FOREACH_THREAD(qy,y,Q1D)
313 {
314 MFEM_FOREACH_THREAD(qz,z,Q1D)
315 {
316 op3[0] = op(qx,qy,qz,0,e);
317 op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
318 op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
319 }
320 }
321 }
322
323 const int tidx = MFEM_THREAD_ID(x);
324 const int tidy = MFEM_THREAD_ID(y);
325 const int tidz = MFEM_THREAD_ID(z);
326
327 if (tidz == 0)
328 {
329 MFEM_FOREACH_THREAD(d,y,D1D)
330 {
331 MFEM_FOREACH_THREAD(q,x,Q1D)
332 {
333 sBc[q][d] = Bc(q,d);
334 if (d < D1D-1)
335 {
336 sBo[q][d] = Bo(q,d);
337 }
338 }
339 }
340 }
341 MFEM_SYNC_THREAD;
342
343 int osc = 0;
344 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
345 {
346 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
347 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
348 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
349
350 double dxyz = 0.0;
351
352 for (int qz=0; qz < Q1D; ++qz)
353 {
354 if (tidz == qz)
355 {
356 for (int i=0; i<3; ++i)
357 {
358 sop[i][tidx][tidy] = op3[i];
359 }
360 }
361
362 MFEM_SYNC_THREAD;
363
364 MFEM_FOREACH_THREAD(dz,z,D1Dz)
365 {
366 const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
367
368 MFEM_FOREACH_THREAD(dy,y,D1Dy)
369 {
370 MFEM_FOREACH_THREAD(dx,x,D1Dx)
371 {
372 for (int qy = 0; qy < Q1D; ++qy)
373 {
374 const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
375
376 for (int qx = 0; qx < Q1D; ++qx)
377 {
378 const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
379 dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
380 }
381 }
382 }
383 }
384 }
385
386 MFEM_SYNC_THREAD;
387 } // qz loop
388
389 MFEM_FOREACH_THREAD(dz,z,D1Dz)
390 {
391 MFEM_FOREACH_THREAD(dy,y,D1Dy)
392 {
393 MFEM_FOREACH_THREAD(dx,x,D1Dx)
394 {
395 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
396 }
397 }
398 }
399
400 osc += D1Dx * D1Dy * D1Dz;
401 } // c loop
402 }); // end of element loop
403 }
404
PAHcurlMassApply3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)405 void PAHcurlMassApply3D(const int D1D,
406 const int Q1D,
407 const int NE,
408 const bool symmetric,
409 const Array<double> &bo,
410 const Array<double> &bc,
411 const Array<double> &bot,
412 const Array<double> &bct,
413 const Vector &pa_data,
414 const Vector &x,
415 Vector &y)
416 {
417 constexpr static int MAX_D1D = HCURL_MAX_D1D;
418 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
419
420 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
421 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
422 constexpr static int VDIM = 3;
423
424 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
425 auto Bc = Reshape(bc.Read(), Q1D, D1D);
426 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
427 auto Bct = Reshape(bct.Read(), D1D, Q1D);
428 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
429 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
430 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
431
432 MFEM_FORALL(e, NE,
433 {
434 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
435
436 for (int qz = 0; qz < Q1D; ++qz)
437 {
438 for (int qy = 0; qy < Q1D; ++qy)
439 {
440 for (int qx = 0; qx < Q1D; ++qx)
441 {
442 for (int c = 0; c < VDIM; ++c)
443 {
444 mass[qz][qy][qx][c] = 0.0;
445 }
446 }
447 }
448 }
449
450 int osc = 0;
451
452 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
453 {
454 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
455 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
456 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
457
458 for (int dz = 0; dz < D1Dz; ++dz)
459 {
460 double massXY[MAX_Q1D][MAX_Q1D];
461 for (int qy = 0; qy < Q1D; ++qy)
462 {
463 for (int qx = 0; qx < Q1D; ++qx)
464 {
465 massXY[qy][qx] = 0.0;
466 }
467 }
468
469 for (int dy = 0; dy < D1Dy; ++dy)
470 {
471 double massX[MAX_Q1D];
472 for (int qx = 0; qx < Q1D; ++qx)
473 {
474 massX[qx] = 0.0;
475 }
476
477 for (int dx = 0; dx < D1Dx; ++dx)
478 {
479 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
480 for (int qx = 0; qx < Q1D; ++qx)
481 {
482 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
483 }
484 }
485
486 for (int qy = 0; qy < Q1D; ++qy)
487 {
488 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
489 for (int qx = 0; qx < Q1D; ++qx)
490 {
491 const double wx = massX[qx];
492 massXY[qy][qx] += wx * wy;
493 }
494 }
495 }
496
497 for (int qz = 0; qz < Q1D; ++qz)
498 {
499 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
500 for (int qy = 0; qy < Q1D; ++qy)
501 {
502 for (int qx = 0; qx < Q1D; ++qx)
503 {
504 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
505 }
506 }
507 }
508 }
509
510 osc += D1Dx * D1Dy * D1Dz;
511 } // loop (c) over components
512
513 // Apply D operator.
514 for (int qz = 0; qz < Q1D; ++qz)
515 {
516 for (int qy = 0; qy < Q1D; ++qy)
517 {
518 for (int qx = 0; qx < Q1D; ++qx)
519 {
520 const double O11 = op(qx,qy,qz,0,e);
521 const double O12 = op(qx,qy,qz,1,e);
522 const double O13 = op(qx,qy,qz,2,e);
523 const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
524 const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
525 const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
526 const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
527 const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
528 const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
529 const double massX = mass[qz][qy][qx][0];
530 const double massY = mass[qz][qy][qx][1];
531 const double massZ = mass[qz][qy][qx][2];
532 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
533 mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
534 mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
535 }
536 }
537 }
538
539 for (int qz = 0; qz < Q1D; ++qz)
540 {
541 double massXY[MAX_D1D][MAX_D1D];
542
543 osc = 0;
544
545 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
546 {
547 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
548 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
549 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
550
551 for (int dy = 0; dy < D1Dy; ++dy)
552 {
553 for (int dx = 0; dx < D1Dx; ++dx)
554 {
555 massXY[dy][dx] = 0.0;
556 }
557 }
558 for (int qy = 0; qy < Q1D; ++qy)
559 {
560 double massX[MAX_D1D];
561 for (int dx = 0; dx < D1Dx; ++dx)
562 {
563 massX[dx] = 0;
564 }
565 for (int qx = 0; qx < Q1D; ++qx)
566 {
567 for (int dx = 0; dx < D1Dx; ++dx)
568 {
569 massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
570 }
571 }
572 for (int dy = 0; dy < D1Dy; ++dy)
573 {
574 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
575 for (int dx = 0; dx < D1Dx; ++dx)
576 {
577 massXY[dy][dx] += massX[dx] * wy;
578 }
579 }
580 }
581
582 for (int dz = 0; dz < D1Dz; ++dz)
583 {
584 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
585 for (int dy = 0; dy < D1Dy; ++dy)
586 {
587 for (int dx = 0; dx < D1Dx; ++dx)
588 {
589 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
590 }
591 }
592 }
593
594 osc += D1Dx * D1Dy * D1Dz;
595 } // loop c
596 } // loop qz
597 }); // end of element loop
598 }
599
600 template<int T_D1D, int T_Q1D>
SmemPAHcurlMassApply3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)601 void SmemPAHcurlMassApply3D(const int D1D,
602 const int Q1D,
603 const int NE,
604 const bool symmetric,
605 const Array<double> &bo,
606 const Array<double> &bc,
607 const Array<double> &bot,
608 const Array<double> &bct,
609 const Vector &pa_data,
610 const Vector &x,
611 Vector &y)
612 {
613 MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
614 MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
615
616 const int dataSize = symmetric ? 6 : 9;
617
618 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
619 auto Bc = Reshape(bc.Read(), Q1D, D1D);
620 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
621 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
622 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
623
624 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
625 {
626 constexpr int VDIM = 3;
627 constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
628 constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
629
630 MFEM_SHARED double sBo[tQ1D][tD1D];
631 MFEM_SHARED double sBc[tQ1D][tD1D];
632
633 double op9[9];
634 MFEM_SHARED double sop[9*tQ1D*tQ1D];
635 MFEM_SHARED double mass[tQ1D][tQ1D][3];
636
637 MFEM_SHARED double sX[tD1D][tD1D][tD1D];
638
639 MFEM_FOREACH_THREAD(qx,x,Q1D)
640 {
641 MFEM_FOREACH_THREAD(qy,y,Q1D)
642 {
643 MFEM_FOREACH_THREAD(qz,z,Q1D)
644 {
645 for (int i=0; i<dataSize; ++i)
646 {
647 op9[i] = op(qx,qy,qz,i,e);
648 }
649 }
650 }
651 }
652
653 const int tidx = MFEM_THREAD_ID(x);
654 const int tidy = MFEM_THREAD_ID(y);
655 const int tidz = MFEM_THREAD_ID(z);
656
657 if (tidz == 0)
658 {
659 MFEM_FOREACH_THREAD(d,y,D1D)
660 {
661 MFEM_FOREACH_THREAD(q,x,Q1D)
662 {
663 sBc[q][d] = Bc(q,d);
664 if (d < D1D-1)
665 {
666 sBo[q][d] = Bo(q,d);
667 }
668 }
669 }
670 }
671 MFEM_SYNC_THREAD;
672
673 for (int qz=0; qz < Q1D; ++qz)
674 {
675 int osc = 0;
676 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
677 {
678 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
679 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
680 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
681
682 MFEM_FOREACH_THREAD(dz,z,D1Dz)
683 {
684 MFEM_FOREACH_THREAD(dy,y,D1Dy)
685 {
686 MFEM_FOREACH_THREAD(dx,x,D1Dx)
687 {
688 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
689 }
690 }
691 }
692 MFEM_SYNC_THREAD;
693
694 if (tidz == qz)
695 {
696 for (int i=0; i<dataSize; ++i)
697 {
698 sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
699 }
700
701 MFEM_FOREACH_THREAD(qy,y,Q1D)
702 {
703 MFEM_FOREACH_THREAD(qx,x,Q1D)
704 {
705 double u = 0.0;
706
707 for (int dz = 0; dz < D1Dz; ++dz)
708 {
709 const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
710 for (int dy = 0; dy < D1Dy; ++dy)
711 {
712 const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
713 for (int dx = 0; dx < D1Dx; ++dx)
714 {
715 const double t = sX[dz][dy][dx];
716 const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
717 u += t * wx * wy * wz;
718 }
719 }
720 }
721
722 mass[qy][qx][c] = u;
723 } // qx
724 } // qy
725 } // tidz == qz
726
727 osc += D1Dx * D1Dy * D1Dz;
728 MFEM_SYNC_THREAD;
729 } // c
730
731 MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
732
733 osc = 0;
734 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
735 {
736 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
737 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
738 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
739
740 double dxyz = 0.0;
741
742 MFEM_FOREACH_THREAD(dz,z,D1Dz)
743 {
744 const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
745
746 MFEM_FOREACH_THREAD(dy,y,D1Dy)
747 {
748 MFEM_FOREACH_THREAD(dx,x,D1Dx)
749 {
750 for (int qy = 0; qy < Q1D; ++qy)
751 {
752 const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
753 for (int qx = 0; qx < Q1D; ++qx)
754 {
755 const int os = (dataSize*qx) + (dataSize*Q1D*qy);
756 const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
757 (symmetric ? 2 : 6))); // O11, O21, O31
758 const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
759 (symmetric ? 4 : 7))); // O12, O22, O32
760 const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
761 (symmetric ? 5 : 8))); // O13, O23, O33
762
763 const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
764 (sop[id3] * mass[qy][qx][2]);
765
766 const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
767 dxyz += m_c * wx * wy * wz;
768 }
769 }
770 }
771 }
772 }
773
774 MFEM_SYNC_THREAD;
775
776 MFEM_FOREACH_THREAD(dz,z,D1Dz)
777 {
778 MFEM_FOREACH_THREAD(dy,y,D1Dy)
779 {
780 MFEM_FOREACH_THREAD(dx,x,D1Dx)
781 {
782 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
783 }
784 }
785 }
786
787 osc += D1Dx * D1Dy * D1Dz;
788 } // c loop
789 } // qz
790 }); // end of element loop
791 }
792
793 // PA H(curl) curl-curl assemble 2D kernel
PACurlCurlSetup2D(const int Q1D,const int NE,const Array<double> & w,const Vector & j,Vector & coeff,Vector & op)794 static void PACurlCurlSetup2D(const int Q1D,
795 const int NE,
796 const Array<double> &w,
797 const Vector &j,
798 Vector &coeff,
799 Vector &op)
800 {
801 const int NQ = Q1D*Q1D;
802 auto W = w.Read();
803 auto J = Reshape(j.Read(), NQ, 2, 2, NE);
804 auto C = Reshape(coeff.Read(), NQ, NE);
805 auto y = Reshape(op.Write(), NQ, NE);
806 MFEM_FORALL(e, NE,
807 {
808 for (int q = 0; q < NQ; ++q)
809 {
810 const double J11 = J(q,0,0,e);
811 const double J21 = J(q,1,0,e);
812 const double J12 = J(q,0,1,e);
813 const double J22 = J(q,1,1,e);
814 const double detJ = (J11*J22)-(J21*J12);
815 y(q,e) = W[q] * C(q,e) / detJ;
816 }
817 });
818 }
819
820 // PA H(curl) curl-curl assemble 3D kernel
PACurlCurlSetup3D(const int Q1D,const int coeffDim,const int NE,const Array<double> & w,const Vector & j,Vector & coeff,Vector & op)821 static void PACurlCurlSetup3D(const int Q1D,
822 const int coeffDim,
823 const int NE,
824 const Array<double> &w,
825 const Vector &j,
826 Vector &coeff,
827 Vector &op)
828 {
829 const int NQ = Q1D*Q1D*Q1D;
830 const bool symmetric = (coeffDim != 9);
831 auto W = w.Read();
832 auto J = Reshape(j.Read(), NQ, 3, 3, NE);
833 auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
834 auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
835
836 MFEM_FORALL(e, NE,
837 {
838 for (int q = 0; q < NQ; ++q)
839 {
840 const double J11 = J(q,0,0,e);
841 const double J21 = J(q,1,0,e);
842 const double J31 = J(q,2,0,e);
843 const double J12 = J(q,0,1,e);
844 const double J22 = J(q,1,1,e);
845 const double J32 = J(q,2,1,e);
846 const double J13 = J(q,0,2,e);
847 const double J23 = J(q,1,2,e);
848 const double J33 = J(q,2,2,e);
849 const double detJ = J11 * (J22 * J33 - J32 * J23) -
850 /* */ J21 * (J12 * J33 - J32 * J13) +
851 /* */ J31 * (J12 * J23 - J22 * J13);
852
853 const double c_detJ = W[q] / detJ;
854
855 if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
856 {
857 // Set y to the 6 or 9 entries of J^T M J / det
858 const double M11 = C(0, q, e);
859 const double M12 = C(1, q, e);
860 const double M13 = C(2, q, e);
861 const double M21 = (!symmetric) ? C(3, q, e) : M12;
862 const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
863 const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
864 const double M31 = (!symmetric) ? C(6, q, e) : M13;
865 const double M32 = (!symmetric) ? C(7, q, e) : M23;
866 const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
867
868 // First compute R = MJ
869 const double R11 = M11*J11 + M12*J21 + M13*J31;
870 const double R12 = M11*J12 + M12*J22 + M13*J32;
871 const double R13 = M11*J13 + M12*J23 + M13*J33;
872 const double R21 = M21*J11 + M22*J21 + M23*J31;
873 const double R22 = M21*J12 + M22*J22 + M23*J32;
874 const double R23 = M21*J13 + M22*J23 + M23*J33;
875 const double R31 = M31*J11 + M32*J21 + M33*J31;
876 const double R32 = M31*J12 + M32*J22 + M33*J32;
877 const double R33 = M31*J13 + M32*J23 + M33*J33;
878
879 // Now set y to J^T R / det
880 y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
881 const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
882 y(q,1,e) = Y12; // 1,2
883 y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
884
885 const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
886 const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
887 const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
888
889 const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
890
891 y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
892 y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
893 y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
894
895 if (!symmetric)
896 {
897 y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
898 y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
899 y(q,8,e) = Y33; // 3,3
900 }
901 }
902 else // Vector or scalar coefficient version
903 {
904 // Set y to the 6 entries of J^T D J / det^2
905 const double D1 = C(0, q, e);
906 const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
907 const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
908
909 y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
910 y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
911 y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
912 y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
913 y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
914 y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
915 }
916 }
917 });
918 }
919
AssemblePA(const FiniteElementSpace & fes)920 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
921 {
922 // Assumes tensor-product elements
923 Mesh *mesh = fes.GetMesh();
924 const FiniteElement *fel = fes.GetFE(0);
925
926 const VectorTensorFiniteElement *el =
927 dynamic_cast<const VectorTensorFiniteElement*>(fel);
928 MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
929
930 const IntegrationRule *ir
931 = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
932 *mesh->GetElementTransformation(0));
933
934 const int dims = el->GetDim();
935 MFEM_VERIFY(dims == 2 || dims == 3, "");
936
937 const int nq = ir->GetNPoints();
938 dim = mesh->Dimension();
939 MFEM_VERIFY(dim == 2 || dim == 3, "");
940
941 const int dimc = (dim == 3) ? 3 : 1;
942
943 ne = fes.GetNE();
944 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
945 mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
946 mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
947 dofs1D = mapsC->ndof;
948 quad1D = mapsC->nqpt;
949
950 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
951
952 const int MQsymmDim = SMQ ? (SMQ->GetSize() * (SMQ->GetSize() + 1)) / 2 : 0;
953 const int MQfullDim = MQ ? (MQ->GetHeight() * MQ->GetWidth()) : 0;
954 const int MQdim = MQ ? MQfullDim : MQsymmDim;
955 const int coeffDim = (MQ || SMQ) ? MQdim : (DQ ? DQ->GetVDim() : 1);
956
957 symmetric = (MQ == NULL);
958
959 const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
960 const int ndata = (dim == 2) ? 1 : (symmetric ? symmDims : MQfullDim);
961 pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
962
963 Vector coeff(coeffDim * ne * nq);
964 coeff = 1.0;
965 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
966 if (Q || DQ || MQ || SMQ)
967 {
968 Vector D(DQ ? coeffDim : 0);
969 DenseMatrix M;
970 DenseSymmetricMatrix SM;
971
972 if (DQ)
973 {
974 MFEM_VERIFY(coeffDim == dimc, "");
975 }
976 if (MQ)
977 {
978 M.SetSize(dimc);
979 MFEM_VERIFY(coeffDim == MQdim, "");
980 MFEM_VERIFY(MQ->GetHeight() == dimc && MQ->GetWidth() == dimc, "");
981 }
982 if (SMQ)
983 {
984 SM.SetSize(dimc);
985 MFEM_VERIFY(SMQ->GetSize() == dimc, "");
986 }
987
988 for (int e=0; e<ne; ++e)
989 {
990 ElementTransformation *tr = mesh->GetElementTransformation(e);
991 for (int p=0; p<nq; ++p)
992 {
993 if (MQ)
994 {
995 MQ->Eval(M, *tr, ir->IntPoint(p));
996
997 for (int i=0; i<dimc; ++i)
998 for (int j=0; j<dimc; ++j)
999 {
1000 coeffh(j+(i*dimc), p, e) = M(i,j);
1001 }
1002
1003 }
1004 else if (SMQ)
1005 {
1006 SMQ->Eval(SM, *tr, ir->IntPoint(p));
1007
1008 int cnt = 0;
1009 for (int i=0; i<dimc; ++i)
1010 for (int j=i; j<dimc; ++j, ++cnt)
1011 {
1012 coeffh(cnt, p, e) = SM(i,j);
1013 }
1014
1015 }
1016 else if (DQ)
1017 {
1018 DQ->Eval(D, *tr, ir->IntPoint(p));
1019 for (int i=0; i<coeffDim; ++i)
1020 {
1021 coeffh(i, p, e) = D[i];
1022 }
1023 }
1024 else
1025 {
1026 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
1027 }
1028 }
1029 }
1030 }
1031
1032 if (el->GetDerivType() != mfem::FiniteElement::CURL)
1033 {
1034 MFEM_ABORT("Unknown kernel.");
1035 }
1036
1037 if (dim == 3)
1038 {
1039 PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
1040 pa_data);
1041 }
1042 else
1043 {
1044 PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1045 }
1046 }
1047
PACurlCurlApply2D(const int D1D,const int Q1D,const int NE,const Array<double> & bo,const Array<double> & bot,const Array<double> & gc,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)1048 static void PACurlCurlApply2D(const int D1D,
1049 const int Q1D,
1050 const int NE,
1051 const Array<double> &bo,
1052 const Array<double> &bot,
1053 const Array<double> &gc,
1054 const Array<double> &gct,
1055 const Vector &pa_data,
1056 const Vector &x,
1057 Vector &y)
1058 {
1059 constexpr static int VDIM = 2;
1060 constexpr static int MAX_D1D = HCURL_MAX_D1D;
1061 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1062
1063 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1064 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1065 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1066 auto Gct = Reshape(gct.Read(), D1D, Q1D);
1067 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1068 auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1069 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1070
1071 MFEM_FORALL(e, NE,
1072 {
1073 double curl[MAX_Q1D][MAX_Q1D];
1074
1075 // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1076
1077 for (int qy = 0; qy < Q1D; ++qy)
1078 {
1079 for (int qx = 0; qx < Q1D; ++qx)
1080 {
1081 curl[qy][qx] = 0.0;
1082 }
1083 }
1084
1085 int osc = 0;
1086
1087 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1088 {
1089 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1090 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1091
1092 for (int dy = 0; dy < D1Dy; ++dy)
1093 {
1094 double gradX[MAX_Q1D];
1095 for (int qx = 0; qx < Q1D; ++qx)
1096 {
1097 gradX[qx] = 0;
1098 }
1099
1100 for (int dx = 0; dx < D1Dx; ++dx)
1101 {
1102 const double t = X(dx + (dy * D1Dx) + osc, e);
1103 for (int qx = 0; qx < Q1D; ++qx)
1104 {
1105 gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1106 }
1107 }
1108
1109 for (int qy = 0; qy < Q1D; ++qy)
1110 {
1111 const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1112 for (int qx = 0; qx < Q1D; ++qx)
1113 {
1114 curl[qy][qx] += gradX[qx] * wy;
1115 }
1116 }
1117 }
1118
1119 osc += D1Dx * D1Dy;
1120 } // loop (c) over components
1121
1122 // Apply D operator.
1123 for (int qy = 0; qy < Q1D; ++qy)
1124 {
1125 for (int qx = 0; qx < Q1D; ++qx)
1126 {
1127 curl[qy][qx] *= op(qx,qy,e);
1128 }
1129 }
1130
1131 for (int qy = 0; qy < Q1D; ++qy)
1132 {
1133 osc = 0;
1134
1135 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1136 {
1137 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1138 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1139
1140 double gradX[MAX_D1D];
1141 for (int dx = 0; dx < D1Dx; ++dx)
1142 {
1143 gradX[dx] = 0.0;
1144 }
1145 for (int qx = 0; qx < Q1D; ++qx)
1146 {
1147 for (int dx = 0; dx < D1Dx; ++dx)
1148 {
1149 gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1150 }
1151 }
1152 for (int dy = 0; dy < D1Dy; ++dy)
1153 {
1154 const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1155
1156 for (int dx = 0; dx < D1Dx; ++dx)
1157 {
1158 Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1159 }
1160 }
1161
1162 osc += D1Dx * D1Dy;
1163 } // loop c
1164 } // loop qy
1165 }); // end of element loop
1166 }
1167
1168 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PACurlCurlApply3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)1169 static void PACurlCurlApply3D(const int D1D,
1170 const int Q1D,
1171 const bool symmetric,
1172 const int NE,
1173 const Array<double> &bo,
1174 const Array<double> &bc,
1175 const Array<double> &bot,
1176 const Array<double> &bct,
1177 const Array<double> &gc,
1178 const Array<double> &gct,
1179 const Vector &pa_data,
1180 const Vector &x,
1181 Vector &y)
1182 {
1183 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1184 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1185 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1186 // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1187 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1188 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1189 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1190
1191 constexpr static int VDIM = 3;
1192
1193 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1194 auto Bc = Reshape(bc.Read(), Q1D, D1D);
1195 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1196 auto Bct = Reshape(bct.Read(), D1D, Q1D);
1197 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1198 auto Gct = Reshape(gct.Read(), D1D, Q1D);
1199 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1200 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1201 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1202
1203 MFEM_FORALL(e, NE,
1204 {
1205 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1206 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1207
1208 for (int qz = 0; qz < Q1D; ++qz)
1209 {
1210 for (int qy = 0; qy < Q1D; ++qy)
1211 {
1212 for (int qx = 0; qx < Q1D; ++qx)
1213 {
1214 for (int c = 0; c < VDIM; ++c)
1215 {
1216 curl[qz][qy][qx][c] = 0.0;
1217 }
1218 }
1219 }
1220 }
1221
1222 // We treat x, y, z components separately for optimization specific to each.
1223
1224 int osc = 0;
1225
1226 {
1227 // x component
1228 const int D1Dz = D1D;
1229 const int D1Dy = D1D;
1230 const int D1Dx = D1D - 1;
1231
1232 for (int dz = 0; dz < D1Dz; ++dz)
1233 {
1234 double gradXY[MAX_Q1D][MAX_Q1D][2];
1235 for (int qy = 0; qy < Q1D; ++qy)
1236 {
1237 for (int qx = 0; qx < Q1D; ++qx)
1238 {
1239 for (int d = 0; d < 2; ++d)
1240 {
1241 gradXY[qy][qx][d] = 0.0;
1242 }
1243 }
1244 }
1245
1246 for (int dy = 0; dy < D1Dy; ++dy)
1247 {
1248 double massX[MAX_Q1D];
1249 for (int qx = 0; qx < Q1D; ++qx)
1250 {
1251 massX[qx] = 0.0;
1252 }
1253
1254 for (int dx = 0; dx < D1Dx; ++dx)
1255 {
1256 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1257 for (int qx = 0; qx < Q1D; ++qx)
1258 {
1259 massX[qx] += t * Bo(qx,dx);
1260 }
1261 }
1262
1263 for (int qy = 0; qy < Q1D; ++qy)
1264 {
1265 const double wy = Bc(qy,dy);
1266 const double wDy = Gc(qy,dy);
1267 for (int qx = 0; qx < Q1D; ++qx)
1268 {
1269 const double wx = massX[qx];
1270 gradXY[qy][qx][0] += wx * wDy;
1271 gradXY[qy][qx][1] += wx * wy;
1272 }
1273 }
1274 }
1275
1276 for (int qz = 0; qz < Q1D; ++qz)
1277 {
1278 const double wz = Bc(qz,dz);
1279 const double wDz = Gc(qz,dz);
1280 for (int qy = 0; qy < Q1D; ++qy)
1281 {
1282 for (int qx = 0; qx < Q1D; ++qx)
1283 {
1284 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1285 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1286 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1287 }
1288 }
1289 }
1290 }
1291
1292 osc += D1Dx * D1Dy * D1Dz;
1293 }
1294
1295 {
1296 // y component
1297 const int D1Dz = D1D;
1298 const int D1Dy = D1D - 1;
1299 const int D1Dx = D1D;
1300
1301 for (int dz = 0; dz < D1Dz; ++dz)
1302 {
1303 double gradXY[MAX_Q1D][MAX_Q1D][2];
1304 for (int qy = 0; qy < Q1D; ++qy)
1305 {
1306 for (int qx = 0; qx < Q1D; ++qx)
1307 {
1308 for (int d = 0; d < 2; ++d)
1309 {
1310 gradXY[qy][qx][d] = 0.0;
1311 }
1312 }
1313 }
1314
1315 for (int dx = 0; dx < D1Dx; ++dx)
1316 {
1317 double massY[MAX_Q1D];
1318 for (int qy = 0; qy < Q1D; ++qy)
1319 {
1320 massY[qy] = 0.0;
1321 }
1322
1323 for (int dy = 0; dy < D1Dy; ++dy)
1324 {
1325 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1326 for (int qy = 0; qy < Q1D; ++qy)
1327 {
1328 massY[qy] += t * Bo(qy,dy);
1329 }
1330 }
1331
1332 for (int qx = 0; qx < Q1D; ++qx)
1333 {
1334 const double wx = Bc(qx,dx);
1335 const double wDx = Gc(qx,dx);
1336 for (int qy = 0; qy < Q1D; ++qy)
1337 {
1338 const double wy = massY[qy];
1339 gradXY[qy][qx][0] += wDx * wy;
1340 gradXY[qy][qx][1] += wx * wy;
1341 }
1342 }
1343 }
1344
1345 for (int qz = 0; qz < Q1D; ++qz)
1346 {
1347 const double wz = Bc(qz,dz);
1348 const double wDz = Gc(qz,dz);
1349 for (int qy = 0; qy < Q1D; ++qy)
1350 {
1351 for (int qx = 0; qx < Q1D; ++qx)
1352 {
1353 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1354 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1355 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1356 }
1357 }
1358 }
1359 }
1360
1361 osc += D1Dx * D1Dy * D1Dz;
1362 }
1363
1364 {
1365 // z component
1366 const int D1Dz = D1D - 1;
1367 const int D1Dy = D1D;
1368 const int D1Dx = D1D;
1369
1370 for (int dx = 0; dx < D1Dx; ++dx)
1371 {
1372 double gradYZ[MAX_Q1D][MAX_Q1D][2];
1373 for (int qz = 0; qz < Q1D; ++qz)
1374 {
1375 for (int qy = 0; qy < Q1D; ++qy)
1376 {
1377 for (int d = 0; d < 2; ++d)
1378 {
1379 gradYZ[qz][qy][d] = 0.0;
1380 }
1381 }
1382 }
1383
1384 for (int dy = 0; dy < D1Dy; ++dy)
1385 {
1386 double massZ[MAX_Q1D];
1387 for (int qz = 0; qz < Q1D; ++qz)
1388 {
1389 massZ[qz] = 0.0;
1390 }
1391
1392 for (int dz = 0; dz < D1Dz; ++dz)
1393 {
1394 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1395 for (int qz = 0; qz < Q1D; ++qz)
1396 {
1397 massZ[qz] += t * Bo(qz,dz);
1398 }
1399 }
1400
1401 for (int qy = 0; qy < Q1D; ++qy)
1402 {
1403 const double wy = Bc(qy,dy);
1404 const double wDy = Gc(qy,dy);
1405 for (int qz = 0; qz < Q1D; ++qz)
1406 {
1407 const double wz = massZ[qz];
1408 gradYZ[qz][qy][0] += wz * wy;
1409 gradYZ[qz][qy][1] += wz * wDy;
1410 }
1411 }
1412 }
1413
1414 for (int qx = 0; qx < Q1D; ++qx)
1415 {
1416 const double wx = Bc(qx,dx);
1417 const double wDx = Gc(qx,dx);
1418
1419 for (int qy = 0; qy < Q1D; ++qy)
1420 {
1421 for (int qz = 0; qz < Q1D; ++qz)
1422 {
1423 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1424 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1425 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1426 }
1427 }
1428 }
1429 }
1430 }
1431
1432 // Apply D operator.
1433 for (int qz = 0; qz < Q1D; ++qz)
1434 {
1435 for (int qy = 0; qy < Q1D; ++qy)
1436 {
1437 for (int qx = 0; qx < Q1D; ++qx)
1438 {
1439 const double O11 = op(qx,qy,qz,0,e);
1440 const double O12 = op(qx,qy,qz,1,e);
1441 const double O13 = op(qx,qy,qz,2,e);
1442 const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1443 const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1444 const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1445 const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1446 const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1447 const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1448
1449 const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1450 (O13 * curl[qz][qy][qx][2]);
1451 const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1452 (O23 * curl[qz][qy][qx][2]);
1453 const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1454 (O33 * curl[qz][qy][qx][2]);
1455
1456 curl[qz][qy][qx][0] = c1;
1457 curl[qz][qy][qx][1] = c2;
1458 curl[qz][qy][qx][2] = c3;
1459 }
1460 }
1461 }
1462
1463 // x component
1464 osc = 0;
1465 {
1466 const int D1Dz = D1D;
1467 const int D1Dy = D1D;
1468 const int D1Dx = D1D - 1;
1469
1470 for (int qz = 0; qz < Q1D; ++qz)
1471 {
1472 double gradXY12[MAX_D1D][MAX_D1D];
1473 double gradXY21[MAX_D1D][MAX_D1D];
1474
1475 for (int dy = 0; dy < D1Dy; ++dy)
1476 {
1477 for (int dx = 0; dx < D1Dx; ++dx)
1478 {
1479 gradXY12[dy][dx] = 0.0;
1480 gradXY21[dy][dx] = 0.0;
1481 }
1482 }
1483 for (int qy = 0; qy < Q1D; ++qy)
1484 {
1485 double massX[MAX_D1D][2];
1486 for (int dx = 0; dx < D1Dx; ++dx)
1487 {
1488 for (int n = 0; n < 2; ++n)
1489 {
1490 massX[dx][n] = 0.0;
1491 }
1492 }
1493 for (int qx = 0; qx < Q1D; ++qx)
1494 {
1495 for (int dx = 0; dx < D1Dx; ++dx)
1496 {
1497 const double wx = Bot(dx,qx);
1498
1499 massX[dx][0] += wx * curl[qz][qy][qx][1];
1500 massX[dx][1] += wx * curl[qz][qy][qx][2];
1501 }
1502 }
1503 for (int dy = 0; dy < D1Dy; ++dy)
1504 {
1505 const double wy = Bct(dy,qy);
1506 const double wDy = Gct(dy,qy);
1507
1508 for (int dx = 0; dx < D1Dx; ++dx)
1509 {
1510 gradXY21[dy][dx] += massX[dx][0] * wy;
1511 gradXY12[dy][dx] += massX[dx][1] * wDy;
1512 }
1513 }
1514 }
1515
1516 for (int dz = 0; dz < D1Dz; ++dz)
1517 {
1518 const double wz = Bct(dz,qz);
1519 const double wDz = Gct(dz,qz);
1520 for (int dy = 0; dy < D1Dy; ++dy)
1521 {
1522 for (int dx = 0; dx < D1Dx; ++dx)
1523 {
1524 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1525 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1526 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1527 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1528 }
1529 }
1530 }
1531 } // loop qz
1532
1533 osc += D1Dx * D1Dy * D1Dz;
1534 }
1535
1536 // y component
1537 {
1538 const int D1Dz = D1D;
1539 const int D1Dy = D1D - 1;
1540 const int D1Dx = D1D;
1541
1542 for (int qz = 0; qz < Q1D; ++qz)
1543 {
1544 double gradXY02[MAX_D1D][MAX_D1D];
1545 double gradXY20[MAX_D1D][MAX_D1D];
1546
1547 for (int dy = 0; dy < D1Dy; ++dy)
1548 {
1549 for (int dx = 0; dx < D1Dx; ++dx)
1550 {
1551 gradXY02[dy][dx] = 0.0;
1552 gradXY20[dy][dx] = 0.0;
1553 }
1554 }
1555 for (int qx = 0; qx < Q1D; ++qx)
1556 {
1557 double massY[MAX_D1D][2];
1558 for (int dy = 0; dy < D1Dy; ++dy)
1559 {
1560 massY[dy][0] = 0.0;
1561 massY[dy][1] = 0.0;
1562 }
1563 for (int qy = 0; qy < Q1D; ++qy)
1564 {
1565 for (int dy = 0; dy < D1Dy; ++dy)
1566 {
1567 const double wy = Bot(dy,qy);
1568
1569 massY[dy][0] += wy * curl[qz][qy][qx][2];
1570 massY[dy][1] += wy * curl[qz][qy][qx][0];
1571 }
1572 }
1573 for (int dx = 0; dx < D1Dx; ++dx)
1574 {
1575 const double wx = Bct(dx,qx);
1576 const double wDx = Gct(dx,qx);
1577
1578 for (int dy = 0; dy < D1Dy; ++dy)
1579 {
1580 gradXY02[dy][dx] += massY[dy][0] * wDx;
1581 gradXY20[dy][dx] += massY[dy][1] * wx;
1582 }
1583 }
1584 }
1585
1586 for (int dz = 0; dz < D1Dz; ++dz)
1587 {
1588 const double wz = Bct(dz,qz);
1589 const double wDz = Gct(dz,qz);
1590 for (int dy = 0; dy < D1Dy; ++dy)
1591 {
1592 for (int dx = 0; dx < D1Dx; ++dx)
1593 {
1594 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1595 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1596 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1597 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1598 }
1599 }
1600 }
1601 } // loop qz
1602
1603 osc += D1Dx * D1Dy * D1Dz;
1604 }
1605
1606 // z component
1607 {
1608 const int D1Dz = D1D - 1;
1609 const int D1Dy = D1D;
1610 const int D1Dx = D1D;
1611
1612 for (int qx = 0; qx < Q1D; ++qx)
1613 {
1614 double gradYZ01[MAX_D1D][MAX_D1D];
1615 double gradYZ10[MAX_D1D][MAX_D1D];
1616
1617 for (int dy = 0; dy < D1Dy; ++dy)
1618 {
1619 for (int dz = 0; dz < D1Dz; ++dz)
1620 {
1621 gradYZ01[dz][dy] = 0.0;
1622 gradYZ10[dz][dy] = 0.0;
1623 }
1624 }
1625 for (int qy = 0; qy < Q1D; ++qy)
1626 {
1627 double massZ[MAX_D1D][2];
1628 for (int dz = 0; dz < D1Dz; ++dz)
1629 {
1630 for (int n = 0; n < 2; ++n)
1631 {
1632 massZ[dz][n] = 0.0;
1633 }
1634 }
1635 for (int qz = 0; qz < Q1D; ++qz)
1636 {
1637 for (int dz = 0; dz < D1Dz; ++dz)
1638 {
1639 const double wz = Bot(dz,qz);
1640
1641 massZ[dz][0] += wz * curl[qz][qy][qx][0];
1642 massZ[dz][1] += wz * curl[qz][qy][qx][1];
1643 }
1644 }
1645 for (int dy = 0; dy < D1Dy; ++dy)
1646 {
1647 const double wy = Bct(dy,qy);
1648 const double wDy = Gct(dy,qy);
1649
1650 for (int dz = 0; dz < D1Dz; ++dz)
1651 {
1652 gradYZ01[dz][dy] += wy * massZ[dz][1];
1653 gradYZ10[dz][dy] += wDy * massZ[dz][0];
1654 }
1655 }
1656 }
1657
1658 for (int dx = 0; dx < D1Dx; ++dx)
1659 {
1660 const double wx = Bct(dx,qx);
1661 const double wDx = Gct(dx,qx);
1662
1663 for (int dy = 0; dy < D1Dy; ++dy)
1664 {
1665 for (int dz = 0; dz < D1Dz; ++dz)
1666 {
1667 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1668 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1669 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1670 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1671 }
1672 }
1673 }
1674 } // loop qx
1675 }
1676 }); // end of element loop
1677 }
1678
1679 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPACurlCurlApply3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)1680 static void SmemPACurlCurlApply3D(const int D1D,
1681 const int Q1D,
1682 const bool symmetric,
1683 const int NE,
1684 const Array<double> &bo,
1685 const Array<double> &bc,
1686 const Array<double> &bot,
1687 const Array<double> &bct,
1688 const Array<double> &gc,
1689 const Array<double> &gct,
1690 const Vector &pa_data,
1691 const Vector &x,
1692 Vector &y)
1693 {
1694 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1695 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1696 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1697 // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1698 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1699 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1700 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1701
1702 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1703 auto Bc = Reshape(bc.Read(), Q1D, D1D);
1704 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1705 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1706 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1707 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1708
1709 const int s = symmetric ? 6 : 9;
1710
1711 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1712 {
1713 constexpr int VDIM = 3;
1714
1715 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1716 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1717 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1718
1719 double ope[9];
1720 MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1721 MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1722
1723 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1724
1725 MFEM_FOREACH_THREAD(qx,x,Q1D)
1726 {
1727 MFEM_FOREACH_THREAD(qy,y,Q1D)
1728 {
1729 MFEM_FOREACH_THREAD(qz,z,Q1D)
1730 {
1731 for (int i=0; i<s; ++i)
1732 {
1733 ope[i] = op(qx,qy,qz,i,e);
1734 }
1735 }
1736 }
1737 }
1738
1739 const int tidx = MFEM_THREAD_ID(x);
1740 const int tidy = MFEM_THREAD_ID(y);
1741 const int tidz = MFEM_THREAD_ID(z);
1742
1743 if (tidz == 0)
1744 {
1745 MFEM_FOREACH_THREAD(d,y,D1D)
1746 {
1747 MFEM_FOREACH_THREAD(q,x,Q1D)
1748 {
1749 sBc[d][q] = Bc(q,d);
1750 sGc[d][q] = Gc(q,d);
1751 if (d < D1D-1)
1752 {
1753 sBo[d][q] = Bo(q,d);
1754 }
1755 }
1756 }
1757 }
1758 MFEM_SYNC_THREAD;
1759
1760 for (int qz=0; qz < Q1D; ++qz)
1761 {
1762 if (tidz == qz)
1763 {
1764 MFEM_FOREACH_THREAD(qy,y,Q1D)
1765 {
1766 MFEM_FOREACH_THREAD(qx,x,Q1D)
1767 {
1768 for (int i=0; i<3; ++i)
1769 {
1770 curl[qy][qx][i] = 0.0;
1771 }
1772 }
1773 }
1774 }
1775
1776 int osc = 0;
1777 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1778 {
1779 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1780 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1781 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1782
1783 MFEM_FOREACH_THREAD(dz,z,D1Dz)
1784 {
1785 MFEM_FOREACH_THREAD(dy,y,D1Dy)
1786 {
1787 MFEM_FOREACH_THREAD(dx,x,D1Dx)
1788 {
1789 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1790 }
1791 }
1792 }
1793 MFEM_SYNC_THREAD;
1794
1795 if (tidz == qz)
1796 {
1797 if (c == 0)
1798 {
1799 for (int i=0; i<s; ++i)
1800 {
1801 sop[i][tidx][tidy] = ope[i];
1802 }
1803 }
1804
1805 MFEM_FOREACH_THREAD(qy,y,Q1D)
1806 {
1807 MFEM_FOREACH_THREAD(qx,x,Q1D)
1808 {
1809 double u = 0.0;
1810 double v = 0.0;
1811
1812 // We treat x, y, z components separately for optimization specific to each.
1813 if (c == 0) // x component
1814 {
1815 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1816
1817 for (int dz = 0; dz < D1Dz; ++dz)
1818 {
1819 const double wz = sBc[dz][qz];
1820 const double wDz = sGc[dz][qz];
1821
1822 for (int dy = 0; dy < D1Dy; ++dy)
1823 {
1824 const double wy = sBc[dy][qy];
1825 const double wDy = sGc[dy][qy];
1826
1827 for (int dx = 0; dx < D1Dx; ++dx)
1828 {
1829 const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1830 u += wx * wDy * wz;
1831 v += wx * wy * wDz;
1832 }
1833 }
1834 }
1835
1836 curl[qy][qx][1] += v; // (u_0)_{x_2}
1837 curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1838 }
1839 else if (c == 1) // y component
1840 {
1841 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1842
1843 for (int dz = 0; dz < D1Dz; ++dz)
1844 {
1845 const double wz = sBc[dz][qz];
1846 const double wDz = sGc[dz][qz];
1847
1848 for (int dy = 0; dy < D1Dy; ++dy)
1849 {
1850 const double wy = sBo[dy][qy];
1851
1852 for (int dx = 0; dx < D1Dx; ++dx)
1853 {
1854 const double t = sX[dz][dy][dx];
1855 const double wx = t * sBc[dx][qx];
1856 const double wDx = t * sGc[dx][qx];
1857
1858 u += wDx * wy * wz;
1859 v += wx * wy * wDz;
1860 }
1861 }
1862 }
1863
1864 curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1865 curl[qy][qx][2] += u; // (u_1)_{x_0}
1866 }
1867 else // z component
1868 {
1869 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1870
1871 for (int dz = 0; dz < D1Dz; ++dz)
1872 {
1873 const double wz = sBo[dz][qz];
1874
1875 for (int dy = 0; dy < D1Dy; ++dy)
1876 {
1877 const double wy = sBc[dy][qy];
1878 const double wDy = sGc[dy][qy];
1879
1880 for (int dx = 0; dx < D1Dx; ++dx)
1881 {
1882 const double t = sX[dz][dy][dx];
1883 const double wx = t * sBc[dx][qx];
1884 const double wDx = t * sGc[dx][qx];
1885
1886 u += wDx * wy * wz;
1887 v += wx * wDy * wz;
1888 }
1889 }
1890 }
1891
1892 curl[qy][qx][0] += v; // (u_2)_{x_1}
1893 curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1894 }
1895 } // qx
1896 } // qy
1897 } // tidz == qz
1898
1899 osc += D1Dx * D1Dy * D1Dz;
1900 MFEM_SYNC_THREAD;
1901 } // c
1902
1903 double dxyz1 = 0.0;
1904 double dxyz2 = 0.0;
1905 double dxyz3 = 0.0;
1906
1907 MFEM_FOREACH_THREAD(dz,z,D1D)
1908 {
1909 const double wcz = sBc[dz][qz];
1910 const double wcDz = sGc[dz][qz];
1911 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1912
1913 MFEM_FOREACH_THREAD(dy,y,D1D)
1914 {
1915 MFEM_FOREACH_THREAD(dx,x,D1D)
1916 {
1917 for (int qy = 0; qy < Q1D; ++qy)
1918 {
1919 const double wcy = sBc[dy][qy];
1920 const double wcDy = sGc[dy][qy];
1921 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1922
1923 for (int qx = 0; qx < Q1D; ++qx)
1924 {
1925 const double O11 = sop[0][qx][qy];
1926 const double O12 = sop[1][qx][qy];
1927 const double O13 = sop[2][qx][qy];
1928 const double O21 = symmetric ? O12 : sop[3][qx][qy];
1929 const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1930 const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1931 const double O31 = symmetric ? O13 : sop[6][qx][qy];
1932 const double O32 = symmetric ? O23 : sop[7][qx][qy];
1933 const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1934
1935 const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1936 (O13 * curl[qy][qx][2]);
1937 const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1938 (O23 * curl[qy][qx][2]);
1939 const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1940 (O33 * curl[qy][qx][2]);
1941
1942 const double wcx = sBc[dx][qx];
1943 const double wDx = sGc[dx][qx];
1944
1945 if (dx < D1D-1)
1946 {
1947 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1948 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1949 const double wx = sBo[dx][qx];
1950 dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1951 }
1952
1953 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1954 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1955 dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1956
1957 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1958 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1959 dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1960 } // qx
1961 } // qy
1962 } // dx
1963 } // dy
1964 } // dz
1965
1966 MFEM_SYNC_THREAD;
1967
1968 MFEM_FOREACH_THREAD(dz,z,D1D)
1969 {
1970 MFEM_FOREACH_THREAD(dy,y,D1D)
1971 {
1972 MFEM_FOREACH_THREAD(dx,x,D1D)
1973 {
1974 if (dx < D1D-1)
1975 {
1976 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
1977 }
1978 if (dy < D1D-1)
1979 {
1980 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
1981 }
1982 if (dz < D1D-1)
1983 {
1984 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
1985 }
1986 }
1987 }
1988 }
1989 } // qz
1990 }); // end of element loop
1991 }
1992
AddMultPA(const Vector & x,Vector & y) const1993 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
1994 {
1995 if (dim == 3)
1996 {
1997 if (Device::Allows(Backend::DEVICE_MASK))
1998 {
1999 const int ID = (dofs1D << 4) | quad1D;
2000 switch (ID)
2001 {
2002 case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2003 mapsO->B, mapsC->B, mapsO->Bt,
2004 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2005 case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2006 mapsO->B, mapsC->B, mapsO->Bt,
2007 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2008 case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2009 mapsO->B,
2010 mapsC->B, mapsO->Bt,
2011 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2012 case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2013 mapsO->B, mapsC->B, mapsO->Bt,
2014 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2015 default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2016 mapsC->B, mapsO->Bt, mapsC->Bt,
2017 mapsC->G, mapsC->Gt, pa_data, x, y);
2018 }
2019 }
2020 else
2021 PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2022 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2023 }
2024 else if (dim == 2)
2025 {
2026 PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2027 mapsC->G, mapsC->Gt, pa_data, x, y);
2028 }
2029 else
2030 {
2031 MFEM_ABORT("Unsupported dimension!");
2032 }
2033 }
2034
PACurlCurlAssembleDiagonal2D(const int D1D,const int Q1D,const int NE,const Array<double> & bo,const Array<double> & gc,const Vector & pa_data,Vector & diag)2035 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2036 const int Q1D,
2037 const int NE,
2038 const Array<double> &bo,
2039 const Array<double> &gc,
2040 const Vector &pa_data,
2041 Vector &diag)
2042 {
2043 constexpr static int VDIM = 2;
2044 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2045
2046 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2047 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2048 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2049 auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2050
2051 MFEM_FORALL(e, NE,
2052 {
2053 int osc = 0;
2054
2055 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2056 {
2057 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2058 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2059
2060 double t[MAX_Q1D];
2061
2062 for (int dy = 0; dy < D1Dy; ++dy)
2063 {
2064 for (int qx = 0; qx < Q1D; ++qx)
2065 {
2066 t[qx] = 0.0;
2067 for (int qy = 0; qy < Q1D; ++qy)
2068 {
2069 const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2070 t[qx] += wy * wy * op(qx,qy,e);
2071 }
2072 }
2073
2074 for (int dx = 0; dx < D1Dx; ++dx)
2075 {
2076 for (int qx = 0; qx < Q1D; ++qx)
2077 {
2078 const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2079 D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2080 }
2081 }
2082 }
2083
2084 osc += D1Dx * D1Dy;
2085 } // loop c
2086 }); // end of element loop
2087 }
2088
2089 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PACurlCurlAssembleDiagonal3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & go,const Array<double> & gc,const Vector & pa_data,Vector & diag)2090 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2091 const int Q1D,
2092 const bool symmetric,
2093 const int NE,
2094 const Array<double> &bo,
2095 const Array<double> &bc,
2096 const Array<double> &go,
2097 const Array<double> &gc,
2098 const Vector &pa_data,
2099 Vector &diag)
2100 {
2101 constexpr static int VDIM = 3;
2102 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2103 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2104
2105 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2106 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2107 auto Go = Reshape(go.Read(), Q1D, D1D-1);
2108 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2109 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2110 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2111
2112 const int s = symmetric ? 6 : 9;
2113 const int i11 = 0;
2114 const int i12 = 1;
2115 const int i13 = 2;
2116 const int i21 = symmetric ? i12 : 3;
2117 const int i22 = symmetric ? 3 : 4;
2118 const int i23 = symmetric ? 4 : 5;
2119 const int i31 = symmetric ? i13 : 6;
2120 const int i32 = symmetric ? i23 : 7;
2121 const int i33 = symmetric ? 5 : 8;
2122
2123 MFEM_FORALL(e, NE,
2124 {
2125 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2126 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2127 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2128 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2129 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2130
2131 // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2132 // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2133
2134 int osc = 0;
2135
2136 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2137 {
2138 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2139 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2140 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2141
2142 double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2143
2144 // z contraction
2145 for (int qx = 0; qx < Q1D; ++qx)
2146 {
2147 for (int qy = 0; qy < Q1D; ++qy)
2148 {
2149 for (int dz = 0; dz < D1Dz; ++dz)
2150 {
2151 for (int i=0; i<s; ++i)
2152 {
2153 for (int d=0; d<3; ++d)
2154 {
2155 zt[qx][qy][dz][i][d] = 0.0;
2156 }
2157 }
2158
2159 for (int qz = 0; qz < Q1D; ++qz)
2160 {
2161 const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2162 const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2163
2164 for (int i=0; i<s; ++i)
2165 {
2166 zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2167 zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2168 zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2169 }
2170 }
2171 }
2172 }
2173 } // end of z contraction
2174
2175 double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2176
2177 // y contraction
2178 for (int qx = 0; qx < Q1D; ++qx)
2179 {
2180 for (int dz = 0; dz < D1Dz; ++dz)
2181 {
2182 for (int dy = 0; dy < D1Dy; ++dy)
2183 {
2184 for (int i=0; i<s; ++i)
2185 {
2186 for (int d=0; d<3; ++d)
2187 for (int j=0; j<3; ++j)
2188 {
2189 yt[qx][dy][dz][i][d][j] = 0.0;
2190 }
2191 }
2192
2193 for (int qy = 0; qy < Q1D; ++qy)
2194 {
2195 const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2196 const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2197
2198 for (int i=0; i<s; ++i)
2199 {
2200 for (int d=0; d<3; ++d)
2201 {
2202 yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2203 yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2204 yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2205 }
2206 }
2207 }
2208 }
2209 }
2210 } // end of y contraction
2211
2212 // x contraction
2213 for (int dz = 0; dz < D1Dz; ++dz)
2214 {
2215 for (int dy = 0; dy < D1Dy; ++dy)
2216 {
2217 for (int dx = 0; dx < D1Dx; ++dx)
2218 {
2219 for (int qx = 0; qx < Q1D; ++qx)
2220 {
2221 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2222 const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2223
2224 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2225 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2226 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2227 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2228 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2229
2230 /*
2231 const double O11 = op(q,0,e);
2232 const double O12 = op(q,1,e);
2233 const double O13 = op(q,2,e);
2234 const double O22 = op(q,3,e);
2235 const double O23 = op(q,4,e);
2236 const double O33 = op(q,5,e);
2237 */
2238
2239 if (c == 0)
2240 {
2241 // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2242 const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2243 - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2244
2245 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2246 }
2247 else if (c == 1)
2248 {
2249 // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2250 const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2251 - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2252 + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2253
2254 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2255 }
2256 else
2257 {
2258 // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2259 const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2260 - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2261 + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2262
2263 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2264 }
2265 }
2266 }
2267 }
2268 } // end of x contraction
2269
2270 osc += D1Dx * D1Dy * D1Dz;
2271 } // loop c
2272 }); // end of element loop
2273 }
2274
2275 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPACurlCurlAssembleDiagonal3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & go,const Array<double> & gc,const Vector & pa_data,Vector & diag)2276 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2277 const int Q1D,
2278 const bool symmetric,
2279 const int NE,
2280 const Array<double> &bo,
2281 const Array<double> &bc,
2282 const Array<double> &go,
2283 const Array<double> &gc,
2284 const Vector &pa_data,
2285 Vector &diag)
2286 {
2287 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2288 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2289
2290 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2291 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2292 auto Go = Reshape(go.Read(), Q1D, D1D-1);
2293 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2294 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2295 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2296
2297 const int s = symmetric ? 6 : 9;
2298 const int i11 = 0;
2299 const int i12 = 1;
2300 const int i13 = 2;
2301 const int i21 = symmetric ? i12 : 3;
2302 const int i22 = symmetric ? 3 : 4;
2303 const int i23 = symmetric ? 4 : 5;
2304 const int i31 = symmetric ? i13 : 6;
2305 const int i32 = symmetric ? i23 : 7;
2306 const int i33 = symmetric ? 5 : 8;
2307
2308 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2309 {
2310 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2311 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2312 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2313 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2314 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2315
2316 constexpr int VDIM = 3;
2317
2318 MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2319 MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2320 MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2321 MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2322
2323 double ope[9];
2324 MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2325
2326 MFEM_FOREACH_THREAD(qx,x,Q1D)
2327 {
2328 MFEM_FOREACH_THREAD(qy,y,Q1D)
2329 {
2330 MFEM_FOREACH_THREAD(qz,z,Q1D)
2331 {
2332 for (int i=0; i<s; ++i)
2333 {
2334 ope[i] = op(qx,qy,qz,i,e);
2335 }
2336 }
2337 }
2338 }
2339
2340 const int tidx = MFEM_THREAD_ID(x);
2341 const int tidy = MFEM_THREAD_ID(y);
2342 const int tidz = MFEM_THREAD_ID(z);
2343
2344 if (tidz == 0)
2345 {
2346 MFEM_FOREACH_THREAD(d,y,D1D)
2347 {
2348 MFEM_FOREACH_THREAD(q,x,Q1D)
2349 {
2350 sBc[q][d] = Bc(q,d);
2351 sGc[q][d] = Gc(q,d);
2352 if (d < D1D-1)
2353 {
2354 sBo[q][d] = Bo(q,d);
2355 sGo[q][d] = Go(q,d);
2356 }
2357 }
2358 }
2359 }
2360 MFEM_SYNC_THREAD;
2361
2362 int osc = 0;
2363 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2364 {
2365 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2366 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2367 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2368
2369 double dxyz = 0.0;
2370
2371 for (int qz=0; qz < Q1D; ++qz)
2372 {
2373 if (tidz == qz)
2374 {
2375 for (int i=0; i<s; ++i)
2376 {
2377 sop[i][tidx][tidy] = ope[i];
2378 }
2379 }
2380
2381 MFEM_SYNC_THREAD;
2382
2383 MFEM_FOREACH_THREAD(dz,z,D1Dz)
2384 {
2385 const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2386 const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2387
2388 MFEM_FOREACH_THREAD(dy,y,D1Dy)
2389 {
2390 MFEM_FOREACH_THREAD(dx,x,D1Dx)
2391 {
2392 for (int qy = 0; qy < Q1D; ++qy)
2393 {
2394 const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2395 const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2396
2397 for (int qx = 0; qx < Q1D; ++qx)
2398 {
2399 const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2400 const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2401
2402 if (c == 0)
2403 {
2404 // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2405
2406 // (u_0)_{x_2} O22 (u_0)_{x_2}
2407 dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2408
2409 // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2410 dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2411
2412 // (u_0)_{x_1} O33 (u_0)_{x_1}
2413 dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2414 }
2415 else if (c == 1)
2416 {
2417 // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2418
2419 // (u_1)_{x_2} O11 (u_1)_{x_2}
2420 dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2421
2422 // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2423 dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2424
2425 // (u_1)_{x_0} O33 (u_1)_{x_0})
2426 dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2427 }
2428 else
2429 {
2430 // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2431
2432 // (u_2)_{x_1} O11 (u_2)_{x_1}
2433 dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2434
2435 // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2436 dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2437
2438 // (u_2)_{x_0} O22 (u_2)_{x_0}
2439 dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2440 }
2441 }
2442 }
2443 }
2444 }
2445 }
2446
2447 MFEM_SYNC_THREAD;
2448 } // qz loop
2449
2450 MFEM_FOREACH_THREAD(dz,z,D1Dz)
2451 {
2452 MFEM_FOREACH_THREAD(dy,y,D1Dy)
2453 {
2454 MFEM_FOREACH_THREAD(dx,x,D1Dx)
2455 {
2456 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2457 }
2458 }
2459 }
2460
2461 osc += D1Dx * D1Dy * D1Dz;
2462 } // c loop
2463 }); // end of element loop
2464 }
2465
AssembleDiagonalPA(Vector & diag)2466 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2467 {
2468 if (dim == 3)
2469 {
2470 if (Device::Allows(Backend::DEVICE_MASK))
2471 {
2472 const int ID = (dofs1D << 4) | quad1D;
2473 switch (ID)
2474 {
2475 case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2476 symmetric, ne,
2477 mapsO->B, mapsC->B,
2478 mapsO->G, mapsC->G,
2479 pa_data, diag);
2480 case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2481 symmetric, ne,
2482 mapsO->B, mapsC->B,
2483 mapsO->G, mapsC->G,
2484 pa_data, diag);
2485 case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2486 symmetric, ne,
2487 mapsO->B, mapsC->B,
2488 mapsO->G, mapsC->G,
2489 pa_data, diag);
2490 case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2491 symmetric, ne,
2492 mapsO->B, mapsC->B,
2493 mapsO->G, mapsC->G,
2494 pa_data, diag);
2495 default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2496 mapsO->B, mapsC->B,
2497 mapsO->G, mapsC->G,
2498 pa_data, diag);
2499 }
2500 }
2501 else
2502 PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2503 mapsO->B, mapsC->B,
2504 mapsO->G, mapsC->G,
2505 pa_data, diag);
2506 }
2507 else if (dim == 2)
2508 {
2509 PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2510 mapsO->B, mapsC->G, pa_data, diag);
2511 }
2512 else
2513 {
2514 MFEM_ABORT("Unsupported dimension!");
2515 }
2516 }
2517
2518 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2519 // integrated against H(curl) test functions corresponding to y.
PAHcurlH1Apply3D(const int D1D,const int Q1D,const int NE,const Array<double> & bc,const Array<double> & gc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)2520 void PAHcurlH1Apply3D(const int D1D,
2521 const int Q1D,
2522 const int NE,
2523 const Array<double> &bc,
2524 const Array<double> &gc,
2525 const Array<double> &bot,
2526 const Array<double> &bct,
2527 const Vector &pa_data,
2528 const Vector &x,
2529 Vector &y)
2530 {
2531 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2532 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2533
2534 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2535 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2536
2537 constexpr static int VDIM = 3;
2538
2539 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2540 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2541 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2542 auto Bct = Reshape(bct.Read(), D1D, Q1D);
2543 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2544 auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2545 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2546
2547 MFEM_FORALL(e, NE,
2548 {
2549 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2550
2551 for (int qz = 0; qz < Q1D; ++qz)
2552 {
2553 for (int qy = 0; qy < Q1D; ++qy)
2554 {
2555 for (int qx = 0; qx < Q1D; ++qx)
2556 {
2557 for (int c = 0; c < VDIM; ++c)
2558 {
2559 mass[qz][qy][qx][c] = 0.0;
2560 }
2561 }
2562 }
2563 }
2564
2565 for (int dz = 0; dz < D1D; ++dz)
2566 {
2567 double gradXY[MAX_Q1D][MAX_Q1D][3];
2568 for (int qy = 0; qy < Q1D; ++qy)
2569 {
2570 for (int qx = 0; qx < Q1D; ++qx)
2571 {
2572 gradXY[qy][qx][0] = 0.0;
2573 gradXY[qy][qx][1] = 0.0;
2574 gradXY[qy][qx][2] = 0.0;
2575 }
2576 }
2577 for (int dy = 0; dy < D1D; ++dy)
2578 {
2579 double gradX[MAX_Q1D][2];
2580 for (int qx = 0; qx < Q1D; ++qx)
2581 {
2582 gradX[qx][0] = 0.0;
2583 gradX[qx][1] = 0.0;
2584 }
2585 for (int dx = 0; dx < D1D; ++dx)
2586 {
2587 const double s = X(dx,dy,dz,e);
2588 for (int qx = 0; qx < Q1D; ++qx)
2589 {
2590 gradX[qx][0] += s * Bc(qx,dx);
2591 gradX[qx][1] += s * Gc(qx,dx);
2592 }
2593 }
2594 for (int qy = 0; qy < Q1D; ++qy)
2595 {
2596 const double wy = Bc(qy,dy);
2597 const double wDy = Gc(qy,dy);
2598 for (int qx = 0; qx < Q1D; ++qx)
2599 {
2600 const double wx = gradX[qx][0];
2601 const double wDx = gradX[qx][1];
2602 gradXY[qy][qx][0] += wDx * wy;
2603 gradXY[qy][qx][1] += wx * wDy;
2604 gradXY[qy][qx][2] += wx * wy;
2605 }
2606 }
2607 }
2608 for (int qz = 0; qz < Q1D; ++qz)
2609 {
2610 const double wz = Bc(qz,dz);
2611 const double wDz = Gc(qz,dz);
2612 for (int qy = 0; qy < Q1D; ++qy)
2613 {
2614 for (int qx = 0; qx < Q1D; ++qx)
2615 {
2616 mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2617 mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2618 mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2619 }
2620 }
2621 }
2622 }
2623
2624 // Apply D operator.
2625 for (int qz = 0; qz < Q1D; ++qz)
2626 {
2627 for (int qy = 0; qy < Q1D; ++qy)
2628 {
2629 for (int qx = 0; qx < Q1D; ++qx)
2630 {
2631 const double O11 = op(qx,qy,qz,0,e);
2632 const double O12 = op(qx,qy,qz,1,e);
2633 const double O13 = op(qx,qy,qz,2,e);
2634 const double O22 = op(qx,qy,qz,3,e);
2635 const double O23 = op(qx,qy,qz,4,e);
2636 const double O33 = op(qx,qy,qz,5,e);
2637 const double massX = mass[qz][qy][qx][0];
2638 const double massY = mass[qz][qy][qx][1];
2639 const double massZ = mass[qz][qy][qx][2];
2640 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2641 mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2642 mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2643 }
2644 }
2645 }
2646
2647 for (int qz = 0; qz < Q1D; ++qz)
2648 {
2649 double massXY[MAX_D1D][MAX_D1D];
2650
2651 int osc = 0;
2652
2653 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2654 {
2655 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2656 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2657 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2658
2659 for (int dy = 0; dy < D1Dy; ++dy)
2660 {
2661 for (int dx = 0; dx < D1Dx; ++dx)
2662 {
2663 massXY[dy][dx] = 0.0;
2664 }
2665 }
2666 for (int qy = 0; qy < Q1D; ++qy)
2667 {
2668 double massX[MAX_D1D];
2669 for (int dx = 0; dx < D1Dx; ++dx)
2670 {
2671 massX[dx] = 0;
2672 }
2673 for (int qx = 0; qx < Q1D; ++qx)
2674 {
2675 for (int dx = 0; dx < D1Dx; ++dx)
2676 {
2677 massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2678 }
2679 }
2680 for (int dy = 0; dy < D1Dy; ++dy)
2681 {
2682 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2683 for (int dx = 0; dx < D1Dx; ++dx)
2684 {
2685 massXY[dy][dx] += massX[dx] * wy;
2686 }
2687 }
2688 }
2689
2690 for (int dz = 0; dz < D1Dz; ++dz)
2691 {
2692 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2693 for (int dy = 0; dy < D1Dy; ++dy)
2694 {
2695 for (int dx = 0; dx < D1Dx; ++dx)
2696 {
2697 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2698 }
2699 }
2700 }
2701
2702 osc += D1Dx * D1Dy * D1Dz;
2703 } // loop c
2704 } // loop qz
2705 }); // end of element loop
2706 }
2707
2708 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2709 // integrated against H(curl) test functions corresponding to y.
PAHcurlH1Apply2D(const int D1D,const int Q1D,const int NE,const Array<double> & bc,const Array<double> & gc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)2710 void PAHcurlH1Apply2D(const int D1D,
2711 const int Q1D,
2712 const int NE,
2713 const Array<double> &bc,
2714 const Array<double> &gc,
2715 const Array<double> &bot,
2716 const Array<double> &bct,
2717 const Vector &pa_data,
2718 const Vector &x,
2719 Vector &y)
2720 {
2721 constexpr static int VDIM = 2;
2722 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2723 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2724
2725 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2726 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2727 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2728 auto Bct = Reshape(bct.Read(), D1D, Q1D);
2729 auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
2730 auto X = Reshape(x.Read(), D1D, D1D, NE);
2731 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2732
2733 MFEM_FORALL(e, NE,
2734 {
2735 double mass[MAX_Q1D][MAX_Q1D][VDIM];
2736
2737 for (int qy = 0; qy < Q1D; ++qy)
2738 {
2739 for (int qx = 0; qx < Q1D; ++qx)
2740 {
2741 for (int c = 0; c < VDIM; ++c)
2742 {
2743 mass[qy][qx][c] = 0.0;
2744 }
2745 }
2746 }
2747
2748 for (int dy = 0; dy < D1D; ++dy)
2749 {
2750 double gradX[MAX_Q1D][2];
2751 for (int qx = 0; qx < Q1D; ++qx)
2752 {
2753 gradX[qx][0] = 0.0;
2754 gradX[qx][1] = 0.0;
2755 }
2756 for (int dx = 0; dx < D1D; ++dx)
2757 {
2758 const double s = X(dx,dy,e);
2759 for (int qx = 0; qx < Q1D; ++qx)
2760 {
2761 gradX[qx][0] += s * Bc(qx,dx);
2762 gradX[qx][1] += s * Gc(qx,dx);
2763 }
2764 }
2765 for (int qy = 0; qy < Q1D; ++qy)
2766 {
2767 const double wy = Bc(qy,dy);
2768 const double wDy = Gc(qy,dy);
2769 for (int qx = 0; qx < Q1D; ++qx)
2770 {
2771 const double wx = gradX[qx][0];
2772 const double wDx = gradX[qx][1];
2773 mass[qy][qx][0] += wDx * wy;
2774 mass[qy][qx][1] += wx * wDy;
2775 }
2776 }
2777 }
2778
2779 // Apply D operator.
2780 for (int qy = 0; qy < Q1D; ++qy)
2781 {
2782 for (int qx = 0; qx < Q1D; ++qx)
2783 {
2784 const double O11 = op(qx,qy,0,e);
2785 const double O12 = op(qx,qy,1,e);
2786 const double O22 = op(qx,qy,2,e);
2787 const double massX = mass[qy][qx][0];
2788 const double massY = mass[qy][qx][1];
2789 mass[qy][qx][0] = (O11*massX)+(O12*massY);
2790 mass[qy][qx][1] = (O12*massX)+(O22*massY);
2791 }
2792 }
2793
2794 for (int qy = 0; qy < Q1D; ++qy)
2795 {
2796 int osc = 0;
2797
2798 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2799 {
2800 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2801 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2802
2803 double massX[MAX_D1D];
2804 for (int dx = 0; dx < D1Dx; ++dx)
2805 {
2806 massX[dx] = 0;
2807 }
2808 for (int qx = 0; qx < Q1D; ++qx)
2809 {
2810 for (int dx = 0; dx < D1Dx; ++dx)
2811 {
2812 massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2813 }
2814 }
2815
2816 for (int dy = 0; dy < D1Dy; ++dy)
2817 {
2818 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2819
2820 for (int dx = 0; dx < D1Dx; ++dx)
2821 {
2822 Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
2823 }
2824 }
2825
2826 osc += D1Dx * D1Dy;
2827 } // loop c
2828 }
2829 }); // end of element loop
2830 }
2831
2832 // PA H(curl) Mass Assemble 3D kernel
PAHcurlL2Setup(const int NQ,const int coeffDim,const int NE,const Array<double> & w,Vector & coeff,Vector & op)2833 void PAHcurlL2Setup(const int NQ,
2834 const int coeffDim,
2835 const int NE,
2836 const Array<double> &w,
2837 Vector &coeff,
2838 Vector &op)
2839 {
2840 auto W = w.Read();
2841 auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
2842 auto y = Reshape(op.Write(), coeffDim, NQ, NE);
2843
2844 MFEM_FORALL(e, NE,
2845 {
2846 for (int q = 0; q < NQ; ++q)
2847 {
2848 for (int c=0; c<coeffDim; ++c)
2849 {
2850 y(c,q,e) = W[q] * C(c,q,e);
2851 }
2852 }
2853 });
2854 }
2855
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)2856 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
2857 const FiniteElementSpace &test_fes)
2858 {
2859 // Assumes tensor-product elements, with vector test and trial spaces.
2860 Mesh *mesh = trial_fes.GetMesh();
2861 const FiniteElement *trial_fel = trial_fes.GetFE(0);
2862 const FiniteElement *test_fel = test_fes.GetFE(0);
2863
2864 const VectorTensorFiniteElement *trial_el =
2865 dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
2866 MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
2867
2868 const VectorTensorFiniteElement *test_el =
2869 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
2870 MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
2871
2872 const IntegrationRule *ir
2873 = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
2874 *mesh->GetElementTransformation(0));
2875 const int dims = trial_el->GetDim();
2876 MFEM_VERIFY(dims == 3, "");
2877
2878 const int nq = ir->GetNPoints();
2879 dim = mesh->Dimension();
2880 MFEM_VERIFY(dim == 3, "");
2881
2882 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
2883
2884 ne = trial_fes.GetNE();
2885 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
2886 mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2887 mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2888 mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2889 mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2890 dofs1D = mapsC->ndof;
2891 quad1D = mapsC->nqpt;
2892 dofs1Dtest = mapsCtest->ndof;
2893
2894 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
2895
2896 testType = test_el->GetDerivType();
2897 trialType = trial_el->GetDerivType();
2898
2899 const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
2900 coeffDim = (DQ ? 3 : 1);
2901
2902 pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType());
2903
2904 Vector coeff(coeffDim * nq * ne);
2905 coeff = 1.0;
2906 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
2907 if (Q || DQ)
2908 {
2909 Vector V(coeffDim);
2910 if (DQ)
2911 {
2912 MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
2913 }
2914
2915 for (int e=0; e<ne; ++e)
2916 {
2917 ElementTransformation *tr = mesh->GetElementTransformation(e);
2918
2919 for (int p=0; p<nq; ++p)
2920 {
2921 if (DQ)
2922 {
2923 DQ->Eval(V, *tr, ir->IntPoint(p));
2924 for (int i=0; i<coeffDim; ++i)
2925 {
2926 coeffh(i, p, e) = V[i];
2927 }
2928 }
2929 else
2930 {
2931 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
2932 }
2933 }
2934 }
2935 }
2936
2937 if (testType == mfem::FiniteElement::CURL &&
2938 trialType == mfem::FiniteElement::CURL && dim == 3)
2939 {
2940 PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
2941 }
2942 else if (testType == mfem::FiniteElement::DIV &&
2943 trialType == mfem::FiniteElement::CURL && dim == 3 &&
2944 test_fel->GetOrder() == trial_fel->GetOrder())
2945 {
2946 PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
2947 pa_data);
2948 }
2949 else
2950 {
2951 MFEM_ABORT("Unknown kernel.");
2952 }
2953 }
2954
2955 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
2956 // integrated against H(curl) test functions corresponding to y.
2957 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PAHcurlL2Apply3D(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)2958 static void PAHcurlL2Apply3D(const int D1D,
2959 const int Q1D,
2960 const int coeffDim,
2961 const int NE,
2962 const Array<double> &bo,
2963 const Array<double> &bc,
2964 const Array<double> &bot,
2965 const Array<double> &bct,
2966 const Array<double> &gc,
2967 const Vector &pa_data,
2968 const Vector &x,
2969 Vector &y)
2970 {
2971 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2972 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2973 // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2974 // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
2975 // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
2976 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2977 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2978 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2979
2980 constexpr static int VDIM = 3;
2981
2982 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2983 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2984 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2985 auto Bct = Reshape(bct.Read(), D1D, Q1D);
2986 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2987 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
2988 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2989 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2990
2991 MFEM_FORALL(e, NE,
2992 {
2993 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2994 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
2995
2996 for (int qz = 0; qz < Q1D; ++qz)
2997 {
2998 for (int qy = 0; qy < Q1D; ++qy)
2999 {
3000 for (int qx = 0; qx < Q1D; ++qx)
3001 {
3002 for (int c = 0; c < VDIM; ++c)
3003 {
3004 curl[qz][qy][qx][c] = 0.0;
3005 }
3006 }
3007 }
3008 }
3009
3010 // We treat x, y, z components separately for optimization specific to each.
3011
3012 int osc = 0;
3013
3014 {
3015 // x component
3016 const int D1Dz = D1D;
3017 const int D1Dy = D1D;
3018 const int D1Dx = D1D - 1;
3019
3020 for (int dz = 0; dz < D1Dz; ++dz)
3021 {
3022 double gradXY[MAX_Q1D][MAX_Q1D][2];
3023 for (int qy = 0; qy < Q1D; ++qy)
3024 {
3025 for (int qx = 0; qx < Q1D; ++qx)
3026 {
3027 for (int d = 0; d < 2; ++d)
3028 {
3029 gradXY[qy][qx][d] = 0.0;
3030 }
3031 }
3032 }
3033
3034 for (int dy = 0; dy < D1Dy; ++dy)
3035 {
3036 double massX[MAX_Q1D];
3037 for (int qx = 0; qx < Q1D; ++qx)
3038 {
3039 massX[qx] = 0.0;
3040 }
3041
3042 for (int dx = 0; dx < D1Dx; ++dx)
3043 {
3044 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3045 for (int qx = 0; qx < Q1D; ++qx)
3046 {
3047 massX[qx] += t * Bo(qx,dx);
3048 }
3049 }
3050
3051 for (int qy = 0; qy < Q1D; ++qy)
3052 {
3053 const double wy = Bc(qy,dy);
3054 const double wDy = Gc(qy,dy);
3055 for (int qx = 0; qx < Q1D; ++qx)
3056 {
3057 const double wx = massX[qx];
3058 gradXY[qy][qx][0] += wx * wDy;
3059 gradXY[qy][qx][1] += wx * wy;
3060 }
3061 }
3062 }
3063
3064 for (int qz = 0; qz < Q1D; ++qz)
3065 {
3066 const double wz = Bc(qz,dz);
3067 const double wDz = Gc(qz,dz);
3068 for (int qy = 0; qy < Q1D; ++qy)
3069 {
3070 for (int qx = 0; qx < Q1D; ++qx)
3071 {
3072 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3073 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3074 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3075 }
3076 }
3077 }
3078 }
3079
3080 osc += D1Dx * D1Dy * D1Dz;
3081 }
3082
3083 {
3084 // y component
3085 const int D1Dz = D1D;
3086 const int D1Dy = D1D - 1;
3087 const int D1Dx = D1D;
3088
3089 for (int dz = 0; dz < D1Dz; ++dz)
3090 {
3091 double gradXY[MAX_Q1D][MAX_Q1D][2];
3092 for (int qy = 0; qy < Q1D; ++qy)
3093 {
3094 for (int qx = 0; qx < Q1D; ++qx)
3095 {
3096 for (int d = 0; d < 2; ++d)
3097 {
3098 gradXY[qy][qx][d] = 0.0;
3099 }
3100 }
3101 }
3102
3103 for (int dx = 0; dx < D1Dx; ++dx)
3104 {
3105 double massY[MAX_Q1D];
3106 for (int qy = 0; qy < Q1D; ++qy)
3107 {
3108 massY[qy] = 0.0;
3109 }
3110
3111 for (int dy = 0; dy < D1Dy; ++dy)
3112 {
3113 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3114 for (int qy = 0; qy < Q1D; ++qy)
3115 {
3116 massY[qy] += t * Bo(qy,dy);
3117 }
3118 }
3119
3120 for (int qx = 0; qx < Q1D; ++qx)
3121 {
3122 const double wx = Bc(qx,dx);
3123 const double wDx = Gc(qx,dx);
3124 for (int qy = 0; qy < Q1D; ++qy)
3125 {
3126 const double wy = massY[qy];
3127 gradXY[qy][qx][0] += wDx * wy;
3128 gradXY[qy][qx][1] += wx * wy;
3129 }
3130 }
3131 }
3132
3133 for (int qz = 0; qz < Q1D; ++qz)
3134 {
3135 const double wz = Bc(qz,dz);
3136 const double wDz = Gc(qz,dz);
3137 for (int qy = 0; qy < Q1D; ++qy)
3138 {
3139 for (int qx = 0; qx < Q1D; ++qx)
3140 {
3141 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3142 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3143 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3144 }
3145 }
3146 }
3147 }
3148
3149 osc += D1Dx * D1Dy * D1Dz;
3150 }
3151
3152 {
3153 // z component
3154 const int D1Dz = D1D - 1;
3155 const int D1Dy = D1D;
3156 const int D1Dx = D1D;
3157
3158 for (int dx = 0; dx < D1Dx; ++dx)
3159 {
3160 double gradYZ[MAX_Q1D][MAX_Q1D][2];
3161 for (int qz = 0; qz < Q1D; ++qz)
3162 {
3163 for (int qy = 0; qy < Q1D; ++qy)
3164 {
3165 for (int d = 0; d < 2; ++d)
3166 {
3167 gradYZ[qz][qy][d] = 0.0;
3168 }
3169 }
3170 }
3171
3172 for (int dy = 0; dy < D1Dy; ++dy)
3173 {
3174 double massZ[MAX_Q1D];
3175 for (int qz = 0; qz < Q1D; ++qz)
3176 {
3177 massZ[qz] = 0.0;
3178 }
3179
3180 for (int dz = 0; dz < D1Dz; ++dz)
3181 {
3182 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3183 for (int qz = 0; qz < Q1D; ++qz)
3184 {
3185 massZ[qz] += t * Bo(qz,dz);
3186 }
3187 }
3188
3189 for (int qy = 0; qy < Q1D; ++qy)
3190 {
3191 const double wy = Bc(qy,dy);
3192 const double wDy = Gc(qy,dy);
3193 for (int qz = 0; qz < Q1D; ++qz)
3194 {
3195 const double wz = massZ[qz];
3196 gradYZ[qz][qy][0] += wz * wy;
3197 gradYZ[qz][qy][1] += wz * wDy;
3198 }
3199 }
3200 }
3201
3202 for (int qx = 0; qx < Q1D; ++qx)
3203 {
3204 const double wx = Bc(qx,dx);
3205 const double wDx = Gc(qx,dx);
3206
3207 for (int qy = 0; qy < Q1D; ++qy)
3208 {
3209 for (int qz = 0; qz < Q1D; ++qz)
3210 {
3211 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3212 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3213 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3214 }
3215 }
3216 }
3217 }
3218 }
3219
3220 // Apply D operator.
3221 for (int qz = 0; qz < Q1D; ++qz)
3222 {
3223 for (int qy = 0; qy < Q1D; ++qy)
3224 {
3225 for (int qx = 0; qx < Q1D; ++qx)
3226 {
3227 for (int c = 0; c < VDIM; ++c)
3228 {
3229 curl[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
3230 }
3231 }
3232 }
3233 }
3234
3235 for (int qz = 0; qz < Q1D; ++qz)
3236 {
3237 double massXY[MAX_D1D][MAX_D1D];
3238
3239 osc = 0;
3240
3241 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3242 {
3243 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3244 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3245 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3246
3247 for (int dy = 0; dy < D1Dy; ++dy)
3248 {
3249 for (int dx = 0; dx < D1Dx; ++dx)
3250 {
3251 massXY[dy][dx] = 0;
3252 }
3253 }
3254 for (int qy = 0; qy < Q1D; ++qy)
3255 {
3256 double massX[MAX_D1D];
3257 for (int dx = 0; dx < D1Dx; ++dx)
3258 {
3259 massX[dx] = 0.0;
3260 }
3261 for (int qx = 0; qx < Q1D; ++qx)
3262 {
3263 for (int dx = 0; dx < D1Dx; ++dx)
3264 {
3265 massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3266 }
3267 }
3268
3269 for (int dy = 0; dy < D1Dy; ++dy)
3270 {
3271 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3272 for (int dx = 0; dx < D1Dx; ++dx)
3273 {
3274 massXY[dy][dx] += massX[dx] * wy;
3275 }
3276 }
3277 }
3278
3279 for (int dz = 0; dz < D1Dz; ++dz)
3280 {
3281 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
3282 for (int dy = 0; dy < D1Dy; ++dy)
3283 {
3284 for (int dx = 0; dx < D1Dx; ++dx)
3285 {
3286 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
3287 }
3288 }
3289 }
3290
3291 osc += D1Dx * D1Dy * D1Dz;
3292 } // loop c
3293 } // loop qz
3294 }); // end of element loop
3295 }
3296
3297 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3298 // integrated against H(curl) test functions corresponding to y.
3299 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPAHcurlL2Apply3D(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)3300 static void SmemPAHcurlL2Apply3D(const int D1D,
3301 const int Q1D,
3302 const int coeffDim,
3303 const int NE,
3304 const Array<double> &bo,
3305 const Array<double> &bc,
3306 const Array<double> &gc,
3307 const Vector &pa_data,
3308 const Vector &x,
3309 Vector &y)
3310 {
3311 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3312 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3313
3314 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3315 auto Bc = Reshape(bc.Read(), Q1D, D1D);
3316 auto Gc = Reshape(gc.Read(), Q1D, D1D);
3317 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3318 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3319 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3320
3321 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
3322 {
3323 constexpr int VDIM = 3;
3324 constexpr int maxCoeffDim = 3;
3325
3326 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
3327 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
3328 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
3329
3330 double opc[maxCoeffDim];
3331 MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
3332 MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
3333
3334 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
3335
3336 MFEM_FOREACH_THREAD(qx,x,Q1D)
3337 {
3338 MFEM_FOREACH_THREAD(qy,y,Q1D)
3339 {
3340 MFEM_FOREACH_THREAD(qz,z,Q1D)
3341 {
3342 for (int i=0; i<coeffDim; ++i)
3343 {
3344 opc[i] = op(i,qx,qy,qz,e);
3345 }
3346 }
3347 }
3348 }
3349
3350 const int tidx = MFEM_THREAD_ID(x);
3351 const int tidy = MFEM_THREAD_ID(y);
3352 const int tidz = MFEM_THREAD_ID(z);
3353
3354 if (tidz == 0)
3355 {
3356 MFEM_FOREACH_THREAD(d,y,D1D)
3357 {
3358 MFEM_FOREACH_THREAD(q,x,Q1D)
3359 {
3360 sBc[d][q] = Bc(q,d);
3361 sGc[d][q] = Gc(q,d);
3362 if (d < D1D-1)
3363 {
3364 sBo[d][q] = Bo(q,d);
3365 }
3366 }
3367 }
3368 }
3369 MFEM_SYNC_THREAD;
3370
3371 for (int qz=0; qz < Q1D; ++qz)
3372 {
3373 if (tidz == qz)
3374 {
3375 MFEM_FOREACH_THREAD(qy,y,Q1D)
3376 {
3377 MFEM_FOREACH_THREAD(qx,x,Q1D)
3378 {
3379 for (int i=0; i<3; ++i)
3380 {
3381 curl[qy][qx][i] = 0.0;
3382 }
3383 }
3384 }
3385 }
3386
3387 int osc = 0;
3388 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3389 {
3390 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3391 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3392 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3393
3394 MFEM_FOREACH_THREAD(dz,z,D1Dz)
3395 {
3396 MFEM_FOREACH_THREAD(dy,y,D1Dy)
3397 {
3398 MFEM_FOREACH_THREAD(dx,x,D1Dx)
3399 {
3400 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3401 }
3402 }
3403 }
3404 MFEM_SYNC_THREAD;
3405
3406 if (tidz == qz)
3407 {
3408 if (c == 0)
3409 {
3410 for (int i=0; i<coeffDim; ++i)
3411 {
3412 sop[i][tidx][tidy] = opc[i];
3413 }
3414 }
3415
3416 MFEM_FOREACH_THREAD(qy,y,Q1D)
3417 {
3418 MFEM_FOREACH_THREAD(qx,x,Q1D)
3419 {
3420 double u = 0.0;
3421 double v = 0.0;
3422
3423 // We treat x, y, z components separately for optimization specific to each.
3424 if (c == 0) // x component
3425 {
3426 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3427
3428 for (int dz = 0; dz < D1Dz; ++dz)
3429 {
3430 const double wz = sBc[dz][qz];
3431 const double wDz = sGc[dz][qz];
3432
3433 for (int dy = 0; dy < D1Dy; ++dy)
3434 {
3435 const double wy = sBc[dy][qy];
3436 const double wDy = sGc[dy][qy];
3437
3438 for (int dx = 0; dx < D1Dx; ++dx)
3439 {
3440 const double wx = sX[dz][dy][dx] * sBo[dx][qx];
3441 u += wx * wDy * wz;
3442 v += wx * wy * wDz;
3443 }
3444 }
3445 }
3446
3447 curl[qy][qx][1] += v; // (u_0)_{x_2}
3448 curl[qy][qx][2] -= u; // -(u_0)_{x_1}
3449 }
3450 else if (c == 1) // y component
3451 {
3452 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3453
3454 for (int dz = 0; dz < D1Dz; ++dz)
3455 {
3456 const double wz = sBc[dz][qz];
3457 const double wDz = sGc[dz][qz];
3458
3459 for (int dy = 0; dy < D1Dy; ++dy)
3460 {
3461 const double wy = sBo[dy][qy];
3462
3463 for (int dx = 0; dx < D1Dx; ++dx)
3464 {
3465 const double t = sX[dz][dy][dx];
3466 const double wx = t * sBc[dx][qx];
3467 const double wDx = t * sGc[dx][qx];
3468
3469 u += wDx * wy * wz;
3470 v += wx * wy * wDz;
3471 }
3472 }
3473 }
3474
3475 curl[qy][qx][0] -= v; // -(u_1)_{x_2}
3476 curl[qy][qx][2] += u; // (u_1)_{x_0}
3477 }
3478 else // z component
3479 {
3480 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3481
3482 for (int dz = 0; dz < D1Dz; ++dz)
3483 {
3484 const double wz = sBo[dz][qz];
3485
3486 for (int dy = 0; dy < D1Dy; ++dy)
3487 {
3488 const double wy = sBc[dy][qy];
3489 const double wDy = sGc[dy][qy];
3490
3491 for (int dx = 0; dx < D1Dx; ++dx)
3492 {
3493 const double t = sX[dz][dy][dx];
3494 const double wx = t * sBc[dx][qx];
3495 const double wDx = t * sGc[dx][qx];
3496
3497 u += wDx * wy * wz;
3498 v += wx * wDy * wz;
3499 }
3500 }
3501 }
3502
3503 curl[qy][qx][0] += v; // (u_2)_{x_1}
3504 curl[qy][qx][1] -= u; // -(u_2)_{x_0}
3505 }
3506 } // qx
3507 } // qy
3508 } // tidz == qz
3509
3510 osc += D1Dx * D1Dy * D1Dz;
3511 MFEM_SYNC_THREAD;
3512 } // c
3513
3514 double dxyz1 = 0.0;
3515 double dxyz2 = 0.0;
3516 double dxyz3 = 0.0;
3517
3518 MFEM_FOREACH_THREAD(dz,z,D1D)
3519 {
3520 const double wcz = sBc[dz][qz];
3521 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
3522
3523 MFEM_FOREACH_THREAD(dy,y,D1D)
3524 {
3525 MFEM_FOREACH_THREAD(dx,x,D1D)
3526 {
3527 for (int qy = 0; qy < Q1D; ++qy)
3528 {
3529 const double wcy = sBc[dy][qy];
3530 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
3531
3532 for (int qx = 0; qx < Q1D; ++qx)
3533 {
3534 const double O1 = sop[0][qx][qy];
3535 const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
3536 const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
3537
3538 const double c1 = O1 * curl[qy][qx][0];
3539 const double c2 = O2 * curl[qy][qx][1];
3540 const double c3 = O3 * curl[qy][qx][2];
3541
3542 const double wcx = sBc[dx][qx];
3543
3544 if (dx < D1D-1)
3545 {
3546 const double wx = sBo[dx][qx];
3547 dxyz1 += c1 * wx * wcy * wcz;
3548 }
3549
3550 dxyz2 += c2 * wcx * wy * wcz;
3551 dxyz3 += c3 * wcx * wcy * wz;
3552 } // qx
3553 } // qy
3554 } // dx
3555 } // dy
3556 } // dz
3557
3558 MFEM_SYNC_THREAD;
3559
3560 MFEM_FOREACH_THREAD(dz,z,D1D)
3561 {
3562 MFEM_FOREACH_THREAD(dy,y,D1D)
3563 {
3564 MFEM_FOREACH_THREAD(dx,x,D1D)
3565 {
3566 if (dx < D1D-1)
3567 {
3568 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
3569 }
3570 if (dy < D1D-1)
3571 {
3572 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
3573 }
3574 if (dz < D1D-1)
3575 {
3576 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
3577 }
3578 }
3579 }
3580 }
3581 } // qz
3582 }); // end of element loop
3583 }
3584
3585 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3586 // integrated against H(div) test functions corresponding to y.
3587 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PAHcurlHdivApply3D(const int D1D,const int D1Dtest,const int Q1D,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)3588 static void PAHcurlHdivApply3D(const int D1D,
3589 const int D1Dtest,
3590 const int Q1D,
3591 const int NE,
3592 const Array<double> &bo,
3593 const Array<double> &bc,
3594 const Array<double> &bot,
3595 const Array<double> &bct,
3596 const Array<double> &gc,
3597 const Vector &pa_data,
3598 const Vector &x,
3599 Vector &y)
3600 {
3601 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3602 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3603 // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
3604 // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
3605 // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
3606 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3607 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3608 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3609
3610 constexpr static int VDIM = 3;
3611
3612 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3613 auto Bc = Reshape(bc.Read(), Q1D, D1D);
3614 auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
3615 auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
3616 auto Gc = Reshape(gc.Read(), Q1D, D1D);
3617 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
3618 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3619 auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
3620
3621 MFEM_FORALL(e, NE,
3622 {
3623 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3624 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3625
3626 for (int qz = 0; qz < Q1D; ++qz)
3627 {
3628 for (int qy = 0; qy < Q1D; ++qy)
3629 {
3630 for (int qx = 0; qx < Q1D; ++qx)
3631 {
3632 for (int c = 0; c < VDIM; ++c)
3633 {
3634 curl[qz][qy][qx][c] = 0.0;
3635 }
3636 }
3637 }
3638 }
3639
3640 // We treat x, y, z components separately for optimization specific to each.
3641
3642 int osc = 0;
3643
3644 {
3645 // x component
3646 const int D1Dz = D1D;
3647 const int D1Dy = D1D;
3648 const int D1Dx = D1D - 1;
3649
3650 for (int dz = 0; dz < D1Dz; ++dz)
3651 {
3652 double gradXY[MAX_Q1D][MAX_Q1D][2];
3653 for (int qy = 0; qy < Q1D; ++qy)
3654 {
3655 for (int qx = 0; qx < Q1D; ++qx)
3656 {
3657 for (int d = 0; d < 2; ++d)
3658 {
3659 gradXY[qy][qx][d] = 0.0;
3660 }
3661 }
3662 }
3663
3664 for (int dy = 0; dy < D1Dy; ++dy)
3665 {
3666 double massX[MAX_Q1D];
3667 for (int qx = 0; qx < Q1D; ++qx)
3668 {
3669 massX[qx] = 0.0;
3670 }
3671
3672 for (int dx = 0; dx < D1Dx; ++dx)
3673 {
3674 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3675 for (int qx = 0; qx < Q1D; ++qx)
3676 {
3677 massX[qx] += t * Bo(qx,dx);
3678 }
3679 }
3680
3681 for (int qy = 0; qy < Q1D; ++qy)
3682 {
3683 const double wy = Bc(qy,dy);
3684 const double wDy = Gc(qy,dy);
3685 for (int qx = 0; qx < Q1D; ++qx)
3686 {
3687 const double wx = massX[qx];
3688 gradXY[qy][qx][0] += wx * wDy;
3689 gradXY[qy][qx][1] += wx * wy;
3690 }
3691 }
3692 }
3693
3694 for (int qz = 0; qz < Q1D; ++qz)
3695 {
3696 const double wz = Bc(qz,dz);
3697 const double wDz = Gc(qz,dz);
3698 for (int qy = 0; qy < Q1D; ++qy)
3699 {
3700 for (int qx = 0; qx < Q1D; ++qx)
3701 {
3702 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3703 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3704 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3705 }
3706 }
3707 }
3708 }
3709
3710 osc += D1Dx * D1Dy * D1Dz;
3711 }
3712
3713 {
3714 // y component
3715 const int D1Dz = D1D;
3716 const int D1Dy = D1D - 1;
3717 const int D1Dx = D1D;
3718
3719 for (int dz = 0; dz < D1Dz; ++dz)
3720 {
3721 double gradXY[MAX_Q1D][MAX_Q1D][2];
3722 for (int qy = 0; qy < Q1D; ++qy)
3723 {
3724 for (int qx = 0; qx < Q1D; ++qx)
3725 {
3726 for (int d = 0; d < 2; ++d)
3727 {
3728 gradXY[qy][qx][d] = 0.0;
3729 }
3730 }
3731 }
3732
3733 for (int dx = 0; dx < D1Dx; ++dx)
3734 {
3735 double massY[MAX_Q1D];
3736 for (int qy = 0; qy < Q1D; ++qy)
3737 {
3738 massY[qy] = 0.0;
3739 }
3740
3741 for (int dy = 0; dy < D1Dy; ++dy)
3742 {
3743 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3744 for (int qy = 0; qy < Q1D; ++qy)
3745 {
3746 massY[qy] += t * Bo(qy,dy);
3747 }
3748 }
3749
3750 for (int qx = 0; qx < Q1D; ++qx)
3751 {
3752 const double wx = Bc(qx,dx);
3753 const double wDx = Gc(qx,dx);
3754 for (int qy = 0; qy < Q1D; ++qy)
3755 {
3756 const double wy = massY[qy];
3757 gradXY[qy][qx][0] += wDx * wy;
3758 gradXY[qy][qx][1] += wx * wy;
3759 }
3760 }
3761 }
3762
3763 for (int qz = 0; qz < Q1D; ++qz)
3764 {
3765 const double wz = Bc(qz,dz);
3766 const double wDz = Gc(qz,dz);
3767 for (int qy = 0; qy < Q1D; ++qy)
3768 {
3769 for (int qx = 0; qx < Q1D; ++qx)
3770 {
3771 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3772 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3773 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3774 }
3775 }
3776 }
3777 }
3778
3779 osc += D1Dx * D1Dy * D1Dz;
3780 }
3781
3782 {
3783 // z component
3784 const int D1Dz = D1D - 1;
3785 const int D1Dy = D1D;
3786 const int D1Dx = D1D;
3787
3788 for (int dx = 0; dx < D1Dx; ++dx)
3789 {
3790 double gradYZ[MAX_Q1D][MAX_Q1D][2];
3791 for (int qz = 0; qz < Q1D; ++qz)
3792 {
3793 for (int qy = 0; qy < Q1D; ++qy)
3794 {
3795 for (int d = 0; d < 2; ++d)
3796 {
3797 gradYZ[qz][qy][d] = 0.0;
3798 }
3799 }
3800 }
3801
3802 for (int dy = 0; dy < D1Dy; ++dy)
3803 {
3804 double massZ[MAX_Q1D];
3805 for (int qz = 0; qz < Q1D; ++qz)
3806 {
3807 massZ[qz] = 0.0;
3808 }
3809
3810 for (int dz = 0; dz < D1Dz; ++dz)
3811 {
3812 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3813 for (int qz = 0; qz < Q1D; ++qz)
3814 {
3815 massZ[qz] += t * Bo(qz,dz);
3816 }
3817 }
3818
3819 for (int qy = 0; qy < Q1D; ++qy)
3820 {
3821 const double wy = Bc(qy,dy);
3822 const double wDy = Gc(qy,dy);
3823 for (int qz = 0; qz < Q1D; ++qz)
3824 {
3825 const double wz = massZ[qz];
3826 gradYZ[qz][qy][0] += wz * wy;
3827 gradYZ[qz][qy][1] += wz * wDy;
3828 }
3829 }
3830 }
3831
3832 for (int qx = 0; qx < Q1D; ++qx)
3833 {
3834 const double wx = Bc(qx,dx);
3835 const double wDx = Gc(qx,dx);
3836
3837 for (int qy = 0; qy < Q1D; ++qy)
3838 {
3839 for (int qz = 0; qz < Q1D; ++qz)
3840 {
3841 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3842 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3843 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3844 }
3845 }
3846 }
3847 }
3848 }
3849
3850 // Apply D operator.
3851 for (int qz = 0; qz < Q1D; ++qz)
3852 {
3853 for (int qy = 0; qy < Q1D; ++qy)
3854 {
3855 for (int qx = 0; qx < Q1D; ++qx)
3856 {
3857 const double O11 = op(qx,qy,qz,0,e);
3858 const double O12 = op(qx,qy,qz,1,e);
3859 const double O13 = op(qx,qy,qz,2,e);
3860 const double O22 = op(qx,qy,qz,3,e);
3861 const double O23 = op(qx,qy,qz,4,e);
3862 const double O33 = op(qx,qy,qz,5,e);
3863
3864 const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
3865 (O13 * curl[qz][qy][qx][2]);
3866 const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
3867 (O23 * curl[qz][qy][qx][2]);
3868 const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
3869 (O33 * curl[qz][qy][qx][2]);
3870
3871 curl[qz][qy][qx][0] = c1;
3872 curl[qz][qy][qx][1] = c2;
3873 curl[qz][qy][qx][2] = c3;
3874 }
3875 }
3876 }
3877
3878 for (int qz = 0; qz < Q1D; ++qz)
3879 {
3880 double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
3881
3882 osc = 0;
3883
3884 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3885 {
3886 const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
3887 const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
3888 const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
3889
3890 for (int dy = 0; dy < D1Dy; ++dy)
3891 {
3892 for (int dx = 0; dx < D1Dx; ++dx)
3893 {
3894 massXY[dy][dx] = 0;
3895 }
3896 }
3897 for (int qy = 0; qy < Q1D; ++qy)
3898 {
3899 double massX[HCURL_MAX_D1D];
3900 for (int dx = 0; dx < D1Dx; ++dx)
3901 {
3902 massX[dx] = 0;
3903 }
3904 for (int qx = 0; qx < Q1D; ++qx)
3905 {
3906 for (int dx = 0; dx < D1Dx; ++dx)
3907 {
3908 massX[dx] += curl[qz][qy][qx][c] *
3909 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
3910 }
3911 }
3912 for (int dy = 0; dy < D1Dy; ++dy)
3913 {
3914 const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
3915 for (int dx = 0; dx < D1Dx; ++dx)
3916 {
3917 massXY[dy][dx] += massX[dx] * wy;
3918 }
3919 }
3920 }
3921
3922 for (int dz = 0; dz < D1Dz; ++dz)
3923 {
3924 const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
3925 for (int dy = 0; dy < D1Dy; ++dy)
3926 {
3927 for (int dx = 0; dx < D1Dx; ++dx)
3928 {
3929 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
3930 massXY[dy][dx] * wz;
3931 }
3932 }
3933 }
3934
3935 osc += D1Dx * D1Dy * D1Dz;
3936 } // loop c
3937 } // loop qz
3938 }); // end of element loop
3939 }
3940
AddMultPA(const Vector & x,Vector & y) const3941 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3942 {
3943 if (testType == mfem::FiniteElement::CURL &&
3944 trialType == mfem::FiniteElement::CURL && dim == 3)
3945 {
3946 if (Device::Allows(Backend::DEVICE_MASK))
3947 {
3948 const int ID = (dofs1D << 4) | quad1D;
3949 switch (ID)
3950 {
3951 case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, coeffDim, ne,
3952 mapsO->B, mapsC->B,
3953 mapsC->G, pa_data, x, y);
3954 case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, coeffDim, ne,
3955 mapsO->B, mapsC->B,
3956 mapsC->G, pa_data, x, y);
3957 case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, coeffDim, ne,
3958 mapsO->B, mapsC->B,
3959 mapsC->G, pa_data, x, y);
3960 case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, coeffDim, ne,
3961 mapsO->B, mapsC->B,
3962 mapsC->G, pa_data, x, y);
3963 default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B,
3964 mapsC->B,
3965 mapsC->G, pa_data, x, y);
3966 }
3967 }
3968 else
3969 PAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
3970 mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
3971 }
3972 else if (testType == mfem::FiniteElement::DIV &&
3973 trialType == mfem::FiniteElement::CURL && dim == 3)
3974 PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
3975 mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
3976 pa_data, x, y);
3977 else
3978 {
3979 MFEM_ABORT("Unsupported dimension or space!");
3980 }
3981 }
3982
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)3983 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
3984 &trial_fes,
3985 const FiniteElementSpace &test_fes)
3986 {
3987 // Assumes tensor-product elements, with vector test and trial spaces.
3988 Mesh *mesh = trial_fes.GetMesh();
3989 const FiniteElement *trial_fel = trial_fes.GetFE(0);
3990 const FiniteElement *test_fel = test_fes.GetFE(0);
3991
3992 const VectorTensorFiniteElement *trial_el =
3993 dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3994 MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
3995
3996 const VectorTensorFiniteElement *test_el =
3997 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
3998 MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
3999
4000 const IntegrationRule *ir
4001 = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
4002 *mesh->GetElementTransformation(0));
4003 const int dims = trial_el->GetDim();
4004 MFEM_VERIFY(dims == 3, "");
4005
4006 const int nq = ir->GetNPoints();
4007 dim = mesh->Dimension();
4008 MFEM_VERIFY(dim == 3, "");
4009
4010 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
4011
4012 ne = trial_fes.GetNE();
4013 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
4014 mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
4015 mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
4016 dofs1D = mapsC->ndof;
4017 quad1D = mapsC->nqpt;
4018
4019 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
4020
4021 coeffDim = DQ ? 3 : 1;
4022
4023 pa_data.SetSize(coeffDim * nq * ne, Device::GetMemoryType());
4024
4025 Vector coeff(coeffDim * nq * ne);
4026 coeff = 1.0;
4027 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
4028 if (Q || DQ)
4029 {
4030 Vector V(coeffDim);
4031 if (DQ)
4032 {
4033 MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
4034 }
4035
4036 for (int e=0; e<ne; ++e)
4037 {
4038 ElementTransformation *tr = mesh->GetElementTransformation(e);
4039
4040 for (int p=0; p<nq; ++p)
4041 {
4042 if (DQ)
4043 {
4044 DQ->Eval(V, *tr, ir->IntPoint(p));
4045 for (int i=0; i<coeffDim; ++i)
4046 {
4047 coeffh(i, p, e) = V[i];
4048 }
4049 }
4050 else
4051 {
4052 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
4053 }
4054 }
4055 }
4056 }
4057
4058 testType = test_el->GetDerivType();
4059 trialType = trial_el->GetDerivType();
4060
4061 if (trialType == mfem::FiniteElement::CURL && dim == 3)
4062 {
4063 PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
4064 }
4065 else
4066 {
4067 MFEM_ABORT("Unknown kernel.");
4068 }
4069 }
4070
4071 // Apply to x corresponding to DOF's in H(curl) (trial), integrated against curl
4072 // of H(curl) test functions corresponding to y.
4073 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PAHcurlL2Apply3DTranspose(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)4074 static void PAHcurlL2Apply3DTranspose(const int D1D,
4075 const int Q1D,
4076 const int coeffDim,
4077 const int NE,
4078 const Array<double> &bo,
4079 const Array<double> &bc,
4080 const Array<double> &bot,
4081 const Array<double> &bct,
4082 const Array<double> &gct,
4083 const Vector &pa_data,
4084 const Vector &x,
4085 Vector &y)
4086 {
4087 // See PAHcurlL2Apply3D for comments.
4088
4089 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4090 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4091
4092 constexpr static int VDIM = 3;
4093
4094 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4095 auto Bc = Reshape(bc.Read(), Q1D, D1D);
4096 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
4097 auto Bct = Reshape(bct.Read(), D1D, Q1D);
4098 auto Gct = Reshape(gct.Read(), D1D, Q1D);
4099 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4100 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4101 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4102
4103 MFEM_FORALL(e, NE,
4104 {
4105 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4106
4107 for (int qz = 0; qz < Q1D; ++qz)
4108 {
4109 for (int qy = 0; qy < Q1D; ++qy)
4110 {
4111 for (int qx = 0; qx < Q1D; ++qx)
4112 {
4113 for (int c = 0; c < VDIM; ++c)
4114 {
4115 mass[qz][qy][qx][c] = 0.0;
4116 }
4117 }
4118 }
4119 }
4120
4121 int osc = 0;
4122
4123 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4124 {
4125 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4126 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4127 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4128
4129 for (int dz = 0; dz < D1Dz; ++dz)
4130 {
4131 double massXY[MAX_Q1D][MAX_Q1D];
4132 for (int qy = 0; qy < Q1D; ++qy)
4133 {
4134 for (int qx = 0; qx < Q1D; ++qx)
4135 {
4136 massXY[qy][qx] = 0.0;
4137 }
4138 }
4139
4140 for (int dy = 0; dy < D1Dy; ++dy)
4141 {
4142 double massX[MAX_Q1D];
4143 for (int qx = 0; qx < Q1D; ++qx)
4144 {
4145 massX[qx] = 0.0;
4146 }
4147
4148 for (int dx = 0; dx < D1Dx; ++dx)
4149 {
4150 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4151 for (int qx = 0; qx < Q1D; ++qx)
4152 {
4153 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
4154 }
4155 }
4156
4157 for (int qy = 0; qy < Q1D; ++qy)
4158 {
4159 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
4160 for (int qx = 0; qx < Q1D; ++qx)
4161 {
4162 const double wx = massX[qx];
4163 massXY[qy][qx] += wx * wy;
4164 }
4165 }
4166 }
4167
4168 for (int qz = 0; qz < Q1D; ++qz)
4169 {
4170 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
4171 for (int qy = 0; qy < Q1D; ++qy)
4172 {
4173 for (int qx = 0; qx < Q1D; ++qx)
4174 {
4175 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4176 }
4177 }
4178 }
4179 }
4180
4181 osc += D1Dx * D1Dy * D1Dz;
4182 } // loop (c) over components
4183
4184 // Apply D operator.
4185 for (int qz = 0; qz < Q1D; ++qz)
4186 {
4187 for (int qy = 0; qy < Q1D; ++qy)
4188 {
4189 for (int qx = 0; qx < Q1D; ++qx)
4190 {
4191 for (int c=0; c<VDIM; ++c)
4192 {
4193 mass[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
4194 }
4195 }
4196 }
4197 }
4198
4199 // x component
4200 osc = 0;
4201 {
4202 const int D1Dz = D1D;
4203 const int D1Dy = D1D;
4204 const int D1Dx = D1D - 1;
4205
4206 for (int qz = 0; qz < Q1D; ++qz)
4207 {
4208 double gradXY12[MAX_D1D][MAX_D1D];
4209 double gradXY21[MAX_D1D][MAX_D1D];
4210
4211 for (int dy = 0; dy < D1Dy; ++dy)
4212 {
4213 for (int dx = 0; dx < D1Dx; ++dx)
4214 {
4215 gradXY12[dy][dx] = 0.0;
4216 gradXY21[dy][dx] = 0.0;
4217 }
4218 }
4219 for (int qy = 0; qy < Q1D; ++qy)
4220 {
4221 double massX[MAX_D1D][2];
4222 for (int dx = 0; dx < D1Dx; ++dx)
4223 {
4224 for (int n = 0; n < 2; ++n)
4225 {
4226 massX[dx][n] = 0.0;
4227 }
4228 }
4229 for (int qx = 0; qx < Q1D; ++qx)
4230 {
4231 for (int dx = 0; dx < D1Dx; ++dx)
4232 {
4233 const double wx = Bot(dx,qx);
4234
4235 massX[dx][0] += wx * mass[qz][qy][qx][1];
4236 massX[dx][1] += wx * mass[qz][qy][qx][2];
4237 }
4238 }
4239 for (int dy = 0; dy < D1Dy; ++dy)
4240 {
4241 const double wy = Bct(dy,qy);
4242 const double wDy = Gct(dy,qy);
4243
4244 for (int dx = 0; dx < D1Dx; ++dx)
4245 {
4246 gradXY21[dy][dx] += massX[dx][0] * wy;
4247 gradXY12[dy][dx] += massX[dx][1] * wDy;
4248 }
4249 }
4250 }
4251
4252 for (int dz = 0; dz < D1Dz; ++dz)
4253 {
4254 const double wz = Bct(dz,qz);
4255 const double wDz = Gct(dz,qz);
4256 for (int dy = 0; dy < D1Dy; ++dy)
4257 {
4258 for (int dx = 0; dx < D1Dx; ++dx)
4259 {
4260 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4261 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
4262 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4263 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
4264 }
4265 }
4266 }
4267 } // loop qz
4268
4269 osc += D1Dx * D1Dy * D1Dz;
4270 }
4271
4272 // y component
4273 {
4274 const int D1Dz = D1D;
4275 const int D1Dy = D1D - 1;
4276 const int D1Dx = D1D;
4277
4278 for (int qz = 0; qz < Q1D; ++qz)
4279 {
4280 double gradXY02[MAX_D1D][MAX_D1D];
4281 double gradXY20[MAX_D1D][MAX_D1D];
4282
4283 for (int dy = 0; dy < D1Dy; ++dy)
4284 {
4285 for (int dx = 0; dx < D1Dx; ++dx)
4286 {
4287 gradXY02[dy][dx] = 0.0;
4288 gradXY20[dy][dx] = 0.0;
4289 }
4290 }
4291 for (int qx = 0; qx < Q1D; ++qx)
4292 {
4293 double massY[MAX_D1D][2];
4294 for (int dy = 0; dy < D1Dy; ++dy)
4295 {
4296 massY[dy][0] = 0.0;
4297 massY[dy][1] = 0.0;
4298 }
4299 for (int qy = 0; qy < Q1D; ++qy)
4300 {
4301 for (int dy = 0; dy < D1Dy; ++dy)
4302 {
4303 const double wy = Bot(dy,qy);
4304
4305 massY[dy][0] += wy * mass[qz][qy][qx][2];
4306 massY[dy][1] += wy * mass[qz][qy][qx][0];
4307 }
4308 }
4309 for (int dx = 0; dx < D1Dx; ++dx)
4310 {
4311 const double wx = Bct(dx,qx);
4312 const double wDx = Gct(dx,qx);
4313
4314 for (int dy = 0; dy < D1Dy; ++dy)
4315 {
4316 gradXY02[dy][dx] += massY[dy][0] * wDx;
4317 gradXY20[dy][dx] += massY[dy][1] * wx;
4318 }
4319 }
4320 }
4321
4322 for (int dz = 0; dz < D1Dz; ++dz)
4323 {
4324 const double wz = Bct(dz,qz);
4325 const double wDz = Gct(dz,qz);
4326 for (int dy = 0; dy < D1Dy; ++dy)
4327 {
4328 for (int dx = 0; dx < D1Dx; ++dx)
4329 {
4330 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4331 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
4332 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4333 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
4334 }
4335 }
4336 }
4337 } // loop qz
4338
4339 osc += D1Dx * D1Dy * D1Dz;
4340 }
4341
4342 // z component
4343 {
4344 const int D1Dz = D1D - 1;
4345 const int D1Dy = D1D;
4346 const int D1Dx = D1D;
4347
4348 for (int qx = 0; qx < Q1D; ++qx)
4349 {
4350 double gradYZ01[MAX_D1D][MAX_D1D];
4351 double gradYZ10[MAX_D1D][MAX_D1D];
4352
4353 for (int dy = 0; dy < D1Dy; ++dy)
4354 {
4355 for (int dz = 0; dz < D1Dz; ++dz)
4356 {
4357 gradYZ01[dz][dy] = 0.0;
4358 gradYZ10[dz][dy] = 0.0;
4359 }
4360 }
4361 for (int qy = 0; qy < Q1D; ++qy)
4362 {
4363 double massZ[MAX_D1D][2];
4364 for (int dz = 0; dz < D1Dz; ++dz)
4365 {
4366 for (int n = 0; n < 2; ++n)
4367 {
4368 massZ[dz][n] = 0.0;
4369 }
4370 }
4371 for (int qz = 0; qz < Q1D; ++qz)
4372 {
4373 for (int dz = 0; dz < D1Dz; ++dz)
4374 {
4375 const double wz = Bot(dz,qz);
4376
4377 massZ[dz][0] += wz * mass[qz][qy][qx][0];
4378 massZ[dz][1] += wz * mass[qz][qy][qx][1];
4379 }
4380 }
4381 for (int dy = 0; dy < D1Dy; ++dy)
4382 {
4383 const double wy = Bct(dy,qy);
4384 const double wDy = Gct(dy,qy);
4385
4386 for (int dz = 0; dz < D1Dz; ++dz)
4387 {
4388 gradYZ01[dz][dy] += wy * massZ[dz][1];
4389 gradYZ10[dz][dy] += wDy * massZ[dz][0];
4390 }
4391 }
4392 }
4393
4394 for (int dx = 0; dx < D1Dx; ++dx)
4395 {
4396 const double wx = Bct(dx,qx);
4397 const double wDx = Gct(dx,qx);
4398
4399 for (int dy = 0; dy < D1Dy; ++dy)
4400 {
4401 for (int dz = 0; dz < D1Dz; ++dz)
4402 {
4403 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4404 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
4405 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4406 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
4407 }
4408 }
4409 }
4410 } // loop qx
4411 }
4412 });
4413 }
4414
4415 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPAHcurlL2Apply3DTranspose(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)4416 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
4417 const int Q1D,
4418 const int coeffDim,
4419 const int NE,
4420 const Array<double> &bo,
4421 const Array<double> &bc,
4422 const Array<double> &gc,
4423 const Vector &pa_data,
4424 const Vector &x,
4425 Vector &y)
4426 {
4427 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4428 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4429
4430 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4431 auto Bc = Reshape(bc.Read(), Q1D, D1D);
4432 auto Gc = Reshape(gc.Read(), Q1D, D1D);
4433 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4434 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4435 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4436
4437 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
4438 {
4439 constexpr int VDIM = 3;
4440 constexpr int maxCoeffDim = 3;
4441
4442 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
4443 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
4444 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
4445
4446 double opc[maxCoeffDim];
4447 MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
4448 MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
4449
4450 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
4451
4452 MFEM_FOREACH_THREAD(qx,x,Q1D)
4453 {
4454 MFEM_FOREACH_THREAD(qy,y,Q1D)
4455 {
4456 MFEM_FOREACH_THREAD(qz,z,Q1D)
4457 {
4458 for (int i=0; i<coeffDim; ++i)
4459 {
4460 opc[i] = op(i,qx,qy,qz,e);
4461 }
4462 }
4463 }
4464 }
4465
4466 const int tidx = MFEM_THREAD_ID(x);
4467 const int tidy = MFEM_THREAD_ID(y);
4468 const int tidz = MFEM_THREAD_ID(z);
4469
4470 if (tidz == 0)
4471 {
4472 MFEM_FOREACH_THREAD(d,y,D1D)
4473 {
4474 MFEM_FOREACH_THREAD(q,x,Q1D)
4475 {
4476 sBc[d][q] = Bc(q,d);
4477 sGc[d][q] = Gc(q,d);
4478 if (d < D1D-1)
4479 {
4480 sBo[d][q] = Bo(q,d);
4481 }
4482 }
4483 }
4484 }
4485 MFEM_SYNC_THREAD;
4486
4487 for (int qz=0; qz < Q1D; ++qz)
4488 {
4489 if (tidz == qz)
4490 {
4491 MFEM_FOREACH_THREAD(qy,y,Q1D)
4492 {
4493 MFEM_FOREACH_THREAD(qx,x,Q1D)
4494 {
4495 for (int i=0; i<3; ++i)
4496 {
4497 mass[qy][qx][i] = 0.0;
4498 }
4499 }
4500 }
4501 }
4502
4503 int osc = 0;
4504 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4505 {
4506 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4507 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4508 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4509
4510 MFEM_FOREACH_THREAD(dz,z,D1Dz)
4511 {
4512 MFEM_FOREACH_THREAD(dy,y,D1Dy)
4513 {
4514 MFEM_FOREACH_THREAD(dx,x,D1Dx)
4515 {
4516 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4517 }
4518 }
4519 }
4520 MFEM_SYNC_THREAD;
4521
4522 if (tidz == qz)
4523 {
4524 if (c == 0)
4525 {
4526 for (int i=0; i<coeffDim; ++i)
4527 {
4528 sop[i][tidx][tidy] = opc[i];
4529 }
4530 }
4531
4532 MFEM_FOREACH_THREAD(qy,y,Q1D)
4533 {
4534 MFEM_FOREACH_THREAD(qx,x,Q1D)
4535 {
4536 double u = 0.0;
4537
4538 for (int dz = 0; dz < D1Dz; ++dz)
4539 {
4540 const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
4541
4542 for (int dy = 0; dy < D1Dy; ++dy)
4543 {
4544 const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
4545
4546 for (int dx = 0; dx < D1Dx; ++dx)
4547 {
4548 const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
4549 u += wx * wy * wz;
4550 }
4551 }
4552 }
4553
4554 mass[qy][qx][c] += u;
4555 } // qx
4556 } // qy
4557 } // tidz == qz
4558
4559 osc += D1Dx * D1Dy * D1Dz;
4560 MFEM_SYNC_THREAD;
4561 } // c
4562
4563 double dxyz1 = 0.0;
4564 double dxyz2 = 0.0;
4565 double dxyz3 = 0.0;
4566
4567 MFEM_FOREACH_THREAD(dz,z,D1D)
4568 {
4569 const double wcz = sBc[dz][qz];
4570 const double wcDz = sGc[dz][qz];
4571 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4572
4573 MFEM_FOREACH_THREAD(dy,y,D1D)
4574 {
4575 MFEM_FOREACH_THREAD(dx,x,D1D)
4576 {
4577 for (int qy = 0; qy < Q1D; ++qy)
4578 {
4579 const double wcy = sBc[dy][qy];
4580 const double wcDy = sGc[dy][qy];
4581 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4582
4583 for (int qx = 0; qx < Q1D; ++qx)
4584 {
4585 const double O1 = sop[0][qx][qy];
4586 const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
4587 const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
4588
4589 const double c1 = O1 * mass[qy][qx][0];
4590 const double c2 = O2 * mass[qy][qx][1];
4591 const double c3 = O3 * mass[qy][qx][2];
4592
4593 const double wcx = sBc[dx][qx];
4594 const double wDx = sGc[dx][qx];
4595
4596 if (dx < D1D-1)
4597 {
4598 const double wx = sBo[dx][qx];
4599 dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
4600 }
4601
4602 dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
4603
4604 dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
4605 } // qx
4606 } // qy
4607 } // dx
4608 } // dy
4609 } // dz
4610
4611 MFEM_SYNC_THREAD;
4612
4613 MFEM_FOREACH_THREAD(dz,z,D1D)
4614 {
4615 MFEM_FOREACH_THREAD(dy,y,D1D)
4616 {
4617 MFEM_FOREACH_THREAD(dx,x,D1D)
4618 {
4619 if (dx < D1D-1)
4620 {
4621 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4622 }
4623 if (dy < D1D-1)
4624 {
4625 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4626 }
4627 if (dz < D1D-1)
4628 {
4629 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4630 }
4631 }
4632 }
4633 }
4634 } // qz
4635 }); // end of element loop
4636 }
4637
AddMultPA(const Vector & x,Vector & y) const4638 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4639 {
4640 if (testType == mfem::FiniteElement::CURL &&
4641 trialType == mfem::FiniteElement::CURL && dim == 3)
4642 {
4643 if (Device::Allows(Backend::DEVICE_MASK))
4644 {
4645 const int ID = (dofs1D << 4) | quad1D;
4646 switch (ID)
4647 {
4648 case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, coeffDim,
4649 ne, mapsO->B, mapsC->B,
4650 mapsC->G, pa_data, x, y);
4651 case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, coeffDim,
4652 ne, mapsO->B, mapsC->B,
4653 mapsC->G, pa_data, x, y);
4654 case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, coeffDim,
4655 ne, mapsO->B, mapsC->B,
4656 mapsC->G, pa_data, x, y);
4657 case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, coeffDim,
4658 ne, mapsO->B, mapsC->B,
4659 mapsC->G, pa_data, x, y);
4660 default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne,
4661 mapsO->B, mapsC->B,
4662 mapsC->G, pa_data, x, y);
4663 }
4664 }
4665 else
4666 PAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
4667 mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
4668 }
4669 else
4670 {
4671 MFEM_ABORT("Unsupported dimension or space!");
4672 }
4673 }
4674
4675 // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient
4676 // to get a dof in H(curl) (range). You can think of the range as the "test" space
4677 // and the domain as the "trial" space, but there's no integration.
PAHcurlApplyGradient2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)4678 static void PAHcurlApplyGradient2D(const int c_dofs1D,
4679 const int o_dofs1D,
4680 const int NE,
4681 const Array<double> &B_,
4682 const Array<double> &G_,
4683 const Vector &x_,
4684 Vector &y_)
4685 {
4686 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4687 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4688
4689 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
4690 auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
4691
4692 constexpr static int MAX_D1D = HCURL_MAX_D1D;
4693 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4694
4695 MFEM_FORALL(e, NE,
4696 {
4697 double w[MAX_D1D][MAX_D1D];
4698
4699 // horizontal part
4700 for (int dx = 0; dx < c_dofs1D; ++dx)
4701 {
4702 for (int ey = 0; ey < c_dofs1D; ++ey)
4703 {
4704 w[dx][ey] = 0.0;
4705 for (int dy = 0; dy < c_dofs1D; ++dy)
4706 {
4707 w[dx][ey] += B(ey, dy) * x(dx, dy, e);
4708 }
4709 }
4710 }
4711
4712 for (int ey = 0; ey < c_dofs1D; ++ey)
4713 {
4714 for (int ex = 0; ex < o_dofs1D; ++ex)
4715 {
4716 double s = 0.0;
4717 for (int dx = 0; dx < c_dofs1D; ++dx)
4718 {
4719 s += G(ex, dx) * w[dx][ey];
4720 }
4721 const int local_index = ey*o_dofs1D + ex;
4722 y(local_index, e) += s;
4723 }
4724 }
4725
4726 // vertical part
4727 for (int dx = 0; dx < c_dofs1D; ++dx)
4728 {
4729 for (int ey = 0; ey < o_dofs1D; ++ey)
4730 {
4731 w[dx][ey] = 0.0;
4732 for (int dy = 0; dy < c_dofs1D; ++dy)
4733 {
4734 w[dx][ey] += G(ey, dy) * x(dx, dy, e);
4735 }
4736 }
4737 }
4738
4739 for (int ey = 0; ey < o_dofs1D; ++ey)
4740 {
4741 for (int ex = 0; ex < c_dofs1D; ++ex)
4742 {
4743 double s = 0.0;
4744 for (int dx = 0; dx < c_dofs1D; ++dx)
4745 {
4746 s += B(ex, dx) * w[dx][ey];
4747 }
4748 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4749 y(local_index, e) += s;
4750 }
4751 }
4752 });
4753 }
4754
4755 // Specialization of PAHcurlApplyGradient2D to the case where B is identity
PAHcurlApplyGradient2DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)4756 static void PAHcurlApplyGradient2DBId(const int c_dofs1D,
4757 const int o_dofs1D,
4758 const int NE,
4759 const Array<double> &G_,
4760 const Vector &x_,
4761 Vector &y_)
4762 {
4763 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4764
4765 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
4766 auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
4767
4768 constexpr static int MAX_D1D = HCURL_MAX_D1D;
4769 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4770
4771 MFEM_FORALL(e, NE,
4772 {
4773 double w[MAX_D1D][MAX_D1D];
4774
4775 // horizontal part
4776 for (int dx = 0; dx < c_dofs1D; ++dx)
4777 {
4778 for (int ey = 0; ey < c_dofs1D; ++ey)
4779 {
4780 const int dy = ey;
4781 w[dx][ey] = x(dx, dy, e);
4782 }
4783 }
4784
4785 for (int ey = 0; ey < c_dofs1D; ++ey)
4786 {
4787 for (int ex = 0; ex < o_dofs1D; ++ex)
4788 {
4789 double s = 0.0;
4790 for (int dx = 0; dx < c_dofs1D; ++dx)
4791 {
4792 s += G(ex, dx) * w[dx][ey];
4793 }
4794 const int local_index = ey*o_dofs1D + ex;
4795 y(local_index, e) += s;
4796 }
4797 }
4798
4799 // vertical part
4800 for (int dx = 0; dx < c_dofs1D; ++dx)
4801 {
4802 for (int ey = 0; ey < o_dofs1D; ++ey)
4803 {
4804 w[dx][ey] = 0.0;
4805 for (int dy = 0; dy < c_dofs1D; ++dy)
4806 {
4807 w[dx][ey] += G(ey, dy) * x(dx, dy, e);
4808 }
4809 }
4810 }
4811
4812 for (int ey = 0; ey < o_dofs1D; ++ey)
4813 {
4814 for (int ex = 0; ex < c_dofs1D; ++ex)
4815 {
4816 const int dx = ex;
4817 const double s = w[dx][ey];
4818 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4819 y(local_index, e) += s;
4820 }
4821 }
4822 });
4823 }
4824
PAHcurlApplyGradientTranspose2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)4825 static void PAHcurlApplyGradientTranspose2D(
4826 const int c_dofs1D, const int o_dofs1D, const int NE,
4827 const Array<double> &B_, const Array<double> &G_,
4828 const Vector &x_, Vector &y_)
4829 {
4830 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4831 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4832
4833 auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
4834 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
4835
4836 constexpr static int MAX_D1D = HCURL_MAX_D1D;
4837 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4838
4839 MFEM_FORALL(e, NE,
4840 {
4841 double w[MAX_D1D][MAX_D1D];
4842
4843 // horizontal part (open x, closed y)
4844 for (int dy = 0; dy < c_dofs1D; ++dy)
4845 {
4846 for (int ex = 0; ex < o_dofs1D; ++ex)
4847 {
4848 w[dy][ex] = 0.0;
4849 for (int ey = 0; ey < c_dofs1D; ++ey)
4850 {
4851 const int local_index = ey*o_dofs1D + ex;
4852 w[dy][ex] += B(ey, dy) * x(local_index, e);
4853 }
4854 }
4855 }
4856
4857 for (int dy = 0; dy < c_dofs1D; ++dy)
4858 {
4859 for (int dx = 0; dx < c_dofs1D; ++dx)
4860 {
4861 double s = 0.0;
4862 for (int ex = 0; ex < o_dofs1D; ++ex)
4863 {
4864 s += G(ex, dx) * w[dy][ex];
4865 }
4866 y(dx, dy, e) += s;
4867 }
4868 }
4869
4870 // vertical part (open y, closed x)
4871 for (int dy = 0; dy < c_dofs1D; ++dy)
4872 {
4873 for (int ex = 0; ex < c_dofs1D; ++ex)
4874 {
4875 w[dy][ex] = 0.0;
4876 for (int ey = 0; ey < o_dofs1D; ++ey)
4877 {
4878 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4879 w[dy][ex] += G(ey, dy) * x(local_index, e);
4880 }
4881 }
4882 }
4883
4884 for (int dy = 0; dy < c_dofs1D; ++dy)
4885 {
4886 for (int dx = 0; dx < c_dofs1D; ++dx)
4887 {
4888 double s = 0.0;
4889 for (int ex = 0; ex < c_dofs1D; ++ex)
4890 {
4891 s += B(ex, dx) * w[dy][ex];
4892 }
4893 y(dx, dy, e) += s;
4894 }
4895 }
4896 });
4897 }
4898
4899 // Specialization of PAHcurlApplyGradientTranspose2D to the case where
4900 // B is identity
PAHcurlApplyGradientTranspose2DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)4901 static void PAHcurlApplyGradientTranspose2DBId(
4902 const int c_dofs1D, const int o_dofs1D, const int NE,
4903 const Array<double> &G_,
4904 const Vector &x_, Vector &y_)
4905 {
4906 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4907
4908 auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
4909 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
4910
4911 constexpr static int MAX_D1D = HCURL_MAX_D1D;
4912 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4913
4914 MFEM_FORALL(e, NE,
4915 {
4916 double w[MAX_D1D][MAX_D1D];
4917
4918 // horizontal part (open x, closed y)
4919 for (int dy = 0; dy < c_dofs1D; ++dy)
4920 {
4921 for (int ex = 0; ex < o_dofs1D; ++ex)
4922 {
4923 const int ey = dy;
4924 const int local_index = ey*o_dofs1D + ex;
4925 w[dy][ex] = x(local_index, e);
4926 }
4927 }
4928
4929 for (int dy = 0; dy < c_dofs1D; ++dy)
4930 {
4931 for (int dx = 0; dx < c_dofs1D; ++dx)
4932 {
4933 double s = 0.0;
4934 for (int ex = 0; ex < o_dofs1D; ++ex)
4935 {
4936 s += G(ex, dx) * w[dy][ex];
4937 }
4938 y(dx, dy, e) += s;
4939 }
4940 }
4941
4942 // vertical part (open y, closed x)
4943 for (int dy = 0; dy < c_dofs1D; ++dy)
4944 {
4945 for (int ex = 0; ex < c_dofs1D; ++ex)
4946 {
4947 w[dy][ex] = 0.0;
4948 for (int ey = 0; ey < o_dofs1D; ++ey)
4949 {
4950 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4951 w[dy][ex] += G(ey, dy) * x(local_index, e);
4952 }
4953 }
4954 }
4955
4956 for (int dy = 0; dy < c_dofs1D; ++dy)
4957 {
4958 for (int dx = 0; dx < c_dofs1D; ++dx)
4959 {
4960 const int ex = dx;
4961 const double s = w[dy][ex];
4962 y(dx, dy, e) += s;
4963 }
4964 }
4965 });
4966 }
4967
PAHcurlApplyGradient3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)4968 static void PAHcurlApplyGradient3D(const int c_dofs1D,
4969 const int o_dofs1D,
4970 const int NE,
4971 const Array<double> &B_,
4972 const Array<double> &G_,
4973 const Vector &x_,
4974 Vector &y_)
4975 {
4976 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4977 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4978
4979 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
4980 auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
4981
4982 constexpr static int MAX_D1D = HCURL_MAX_D1D;
4983 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4984
4985 MFEM_FORALL(e, NE,
4986 {
4987 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
4988 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
4989
4990 // ---
4991 // dofs that point parallel to x-axis (open in x, closed in y, z)
4992 // ---
4993
4994 // contract in z
4995 for (int ez = 0; ez < c_dofs1D; ++ez)
4996 {
4997 for (int dx = 0; dx < c_dofs1D; ++dx)
4998 {
4999 for (int dy = 0; dy < c_dofs1D; ++dy)
5000 {
5001 w1[dx][dy][ez] = 0.0;
5002 for (int dz = 0; dz < c_dofs1D; ++dz)
5003 {
5004 w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5005 }
5006 }
5007 }
5008 }
5009
5010 // contract in y
5011 for (int ez = 0; ez < c_dofs1D; ++ez)
5012 {
5013 for (int ey = 0; ey < c_dofs1D; ++ey)
5014 {
5015 for (int dx = 0; dx < c_dofs1D; ++dx)
5016 {
5017 w2[dx][ey][ez] = 0.0;
5018 for (int dy = 0; dy < c_dofs1D; ++dy)
5019 {
5020 w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5021 }
5022 }
5023 }
5024 }
5025
5026 // contract in x
5027 for (int ez = 0; ez < c_dofs1D; ++ez)
5028 {
5029 for (int ey = 0; ey < c_dofs1D; ++ey)
5030 {
5031 for (int ex = 0; ex < o_dofs1D; ++ex)
5032 {
5033 double s = 0.0;
5034 for (int dx = 0; dx < c_dofs1D; ++dx)
5035 {
5036 s += G(ex, dx) * w2[dx][ey][ez];
5037 }
5038 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5039 y(local_index, e) += s;
5040 }
5041 }
5042 }
5043
5044 // ---
5045 // dofs that point parallel to y-axis (open in y, closed in x, z)
5046 // ---
5047
5048 // contract in z
5049 for (int ez = 0; ez < c_dofs1D; ++ez)
5050 {
5051 for (int dx = 0; dx < c_dofs1D; ++dx)
5052 {
5053 for (int dy = 0; dy < c_dofs1D; ++dy)
5054 {
5055 w1[dx][dy][ez] = 0.0;
5056 for (int dz = 0; dz < c_dofs1D; ++dz)
5057 {
5058 w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5059 }
5060 }
5061 }
5062 }
5063
5064 // contract in y
5065 for (int ez = 0; ez < c_dofs1D; ++ez)
5066 {
5067 for (int ey = 0; ey < o_dofs1D; ++ey)
5068 {
5069 for (int dx = 0; dx < c_dofs1D; ++dx)
5070 {
5071 w2[dx][ey][ez] = 0.0;
5072 for (int dy = 0; dy < c_dofs1D; ++dy)
5073 {
5074 w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5075 }
5076 }
5077 }
5078 }
5079
5080 // contract in x
5081 for (int ez = 0; ez < c_dofs1D; ++ez)
5082 {
5083 for (int ey = 0; ey < o_dofs1D; ++ey)
5084 {
5085 for (int ex = 0; ex < c_dofs1D; ++ex)
5086 {
5087 double s = 0.0;
5088 for (int dx = 0; dx < c_dofs1D; ++dx)
5089 {
5090 s += B(ex, dx) * w2[dx][ey][ez];
5091 }
5092 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5093 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5094 y(local_index, e) += s;
5095 }
5096 }
5097 }
5098
5099 // ---
5100 // dofs that point parallel to z-axis (open in z, closed in x, y)
5101 // ---
5102
5103 // contract in z
5104 for (int ez = 0; ez < o_dofs1D; ++ez)
5105 {
5106 for (int dx = 0; dx < c_dofs1D; ++dx)
5107 {
5108 for (int dy = 0; dy < c_dofs1D; ++dy)
5109 {
5110 w1[dx][dy][ez] = 0.0;
5111 for (int dz = 0; dz < c_dofs1D; ++dz)
5112 {
5113 w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5114 }
5115 }
5116 }
5117 }
5118
5119 // contract in y
5120 for (int ez = 0; ez < o_dofs1D; ++ez)
5121 {
5122 for (int ey = 0; ey < c_dofs1D; ++ey)
5123 {
5124 for (int dx = 0; dx < c_dofs1D; ++dx)
5125 {
5126 w2[dx][ey][ez] = 0.0;
5127 for (int dy = 0; dy < c_dofs1D; ++dy)
5128 {
5129 w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5130 }
5131 }
5132 }
5133 }
5134
5135 // contract in x
5136 for (int ez = 0; ez < o_dofs1D; ++ez)
5137 {
5138 for (int ey = 0; ey < c_dofs1D; ++ey)
5139 {
5140 for (int ex = 0; ex < c_dofs1D; ++ex)
5141 {
5142 double s = 0.0;
5143 for (int dx = 0; dx < c_dofs1D; ++dx)
5144 {
5145 s += B(ex, dx) * w2[dx][ey][ez];
5146 }
5147 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5148 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5149 y(local_index, e) += s;
5150 }
5151 }
5152 }
5153 });
5154 }
5155
5156 // Specialization of PAHcurlApplyGradient3D to the case where
PAHcurlApplyGradient3DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)5157 static void PAHcurlApplyGradient3DBId(const int c_dofs1D,
5158 const int o_dofs1D,
5159 const int NE,
5160 const Array<double> &G_,
5161 const Vector &x_,
5162 Vector &y_)
5163 {
5164 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5165
5166 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5167 auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5168
5169 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5170 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5171
5172 MFEM_FORALL(e, NE,
5173 {
5174 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5175 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5176
5177 // ---
5178 // dofs that point parallel to x-axis (open in x, closed in y, z)
5179 // ---
5180
5181 // contract in z
5182 for (int ez = 0; ez < c_dofs1D; ++ez)
5183 {
5184 for (int dx = 0; dx < c_dofs1D; ++dx)
5185 {
5186 for (int dy = 0; dy < c_dofs1D; ++dy)
5187 {
5188 const int dz = ez;
5189 w1[dx][dy][ez] = x(dx, dy, dz, e);
5190 }
5191 }
5192 }
5193
5194 // contract in y
5195 for (int ez = 0; ez < c_dofs1D; ++ez)
5196 {
5197 for (int ey = 0; ey < c_dofs1D; ++ey)
5198 {
5199 for (int dx = 0; dx < c_dofs1D; ++dx)
5200 {
5201 const int dy = ey;
5202 w2[dx][ey][ez] = w1[dx][dy][ez];
5203 }
5204 }
5205 }
5206
5207 // contract in x
5208 for (int ez = 0; ez < c_dofs1D; ++ez)
5209 {
5210 for (int ey = 0; ey < c_dofs1D; ++ey)
5211 {
5212 for (int ex = 0; ex < o_dofs1D; ++ex)
5213 {
5214 double s = 0.0;
5215 for (int dx = 0; dx < c_dofs1D; ++dx)
5216 {
5217 s += G(ex, dx) * w2[dx][ey][ez];
5218 }
5219 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5220 y(local_index, e) += s;
5221 }
5222 }
5223 }
5224
5225 // ---
5226 // dofs that point parallel to y-axis (open in y, closed in x, z)
5227 // ---
5228
5229 // contract in z
5230 for (int ez = 0; ez < c_dofs1D; ++ez)
5231 {
5232 for (int dx = 0; dx < c_dofs1D; ++dx)
5233 {
5234 for (int dy = 0; dy < c_dofs1D; ++dy)
5235 {
5236 const int dz = ez;
5237 w1[dx][dy][ez] = x(dx, dy, dz, e);
5238 }
5239 }
5240 }
5241
5242 // contract in y
5243 for (int ez = 0; ez < c_dofs1D; ++ez)
5244 {
5245 for (int ey = 0; ey < o_dofs1D; ++ey)
5246 {
5247 for (int dx = 0; dx < c_dofs1D; ++dx)
5248 {
5249 w2[dx][ey][ez] = 0.0;
5250 for (int dy = 0; dy < c_dofs1D; ++dy)
5251 {
5252 w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5253 }
5254 }
5255 }
5256 }
5257
5258 // contract in x
5259 for (int ez = 0; ez < c_dofs1D; ++ez)
5260 {
5261 for (int ey = 0; ey < o_dofs1D; ++ey)
5262 {
5263 for (int ex = 0; ex < c_dofs1D; ++ex)
5264 {
5265 const int dx = ex;
5266 const double s = w2[dx][ey][ez];
5267 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5268 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5269 y(local_index, e) += s;
5270 }
5271 }
5272 }
5273
5274 // ---
5275 // dofs that point parallel to z-axis (open in z, closed in x, y)
5276 // ---
5277
5278 // contract in z
5279 for (int ez = 0; ez < o_dofs1D; ++ez)
5280 {
5281 for (int dx = 0; dx < c_dofs1D; ++dx)
5282 {
5283 for (int dy = 0; dy < c_dofs1D; ++dy)
5284 {
5285 w1[dx][dy][ez] = 0.0;
5286 for (int dz = 0; dz < c_dofs1D; ++dz)
5287 {
5288 w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5289 }
5290 }
5291 }
5292 }
5293
5294 // contract in y
5295 for (int ez = 0; ez < o_dofs1D; ++ez)
5296 {
5297 for (int ey = 0; ey < c_dofs1D; ++ey)
5298 {
5299 for (int dx = 0; dx < c_dofs1D; ++dx)
5300 {
5301 const int dy = ey;
5302 w2[dx][ey][ez] = w1[dx][dy][ez];
5303 }
5304 }
5305 }
5306
5307 // contract in x
5308 for (int ez = 0; ez < o_dofs1D; ++ez)
5309 {
5310 for (int ey = 0; ey < c_dofs1D; ++ey)
5311 {
5312 for (int ex = 0; ex < c_dofs1D; ++ex)
5313 {
5314 const int dx = ex;
5315 const double s = w2[dx][ey][ez];
5316 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5317 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5318 y(local_index, e) += s;
5319 }
5320 }
5321 }
5322 });
5323 }
5324
PAHcurlApplyGradientTranspose3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)5325 static void PAHcurlApplyGradientTranspose3D(
5326 const int c_dofs1D, const int o_dofs1D, const int NE,
5327 const Array<double> &B_, const Array<double> &G_,
5328 const Vector &x_, Vector &y_)
5329 {
5330 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5331 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5332
5333 auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5334 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5335
5336 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5337 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5338
5339 MFEM_FORALL(e, NE,
5340 {
5341 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5342 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5343 // ---
5344 // dofs that point parallel to x-axis (open in x, closed in y, z)
5345 // ---
5346
5347 // contract in z
5348 for (int dz = 0; dz < c_dofs1D; ++dz)
5349 {
5350 for (int ex = 0; ex < o_dofs1D; ++ex)
5351 {
5352 for (int ey = 0; ey < c_dofs1D; ++ey)
5353 {
5354 w1[ex][ey][dz] = 0.0;
5355 for (int ez = 0; ez < c_dofs1D; ++ez)
5356 {
5357 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5358 w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
5359 }
5360 }
5361 }
5362 }
5363
5364 // contract in y
5365 for (int dz = 0; dz < c_dofs1D; ++dz)
5366 {
5367 for (int dy = 0; dy < c_dofs1D; ++dy)
5368 {
5369 for (int ex = 0; ex < o_dofs1D; ++ex)
5370 {
5371 w2[ex][dy][dz] = 0.0;
5372 for (int ey = 0; ey < c_dofs1D; ++ey)
5373 {
5374 w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
5375 }
5376 }
5377 }
5378 }
5379
5380 // contract in x
5381 for (int dz = 0; dz < c_dofs1D; ++dz)
5382 {
5383 for (int dy = 0; dy < c_dofs1D; ++dy)
5384 {
5385 for (int dx = 0; dx < c_dofs1D; ++dx)
5386 {
5387 double s = 0.0;
5388 for (int ex = 0; ex < o_dofs1D; ++ex)
5389 {
5390 s += G(ex, dx) * w2[ex][dy][dz];
5391 }
5392 y(dx, dy, dz, e) += s;
5393 }
5394 }
5395 }
5396
5397 // ---
5398 // dofs that point parallel to y-axis (open in y, closed in x, z)
5399 // ---
5400
5401 // contract in z
5402 for (int dz = 0; dz < c_dofs1D; ++dz)
5403 {
5404 for (int ex = 0; ex < c_dofs1D; ++ex)
5405 {
5406 for (int ey = 0; ey < o_dofs1D; ++ey)
5407 {
5408 w1[ex][ey][dz] = 0.0;
5409 for (int ez = 0; ez < c_dofs1D; ++ez)
5410 {
5411 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5412 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5413 w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
5414 }
5415 }
5416 }
5417 }
5418
5419 // contract in y
5420 for (int dz = 0; dz < c_dofs1D; ++dz)
5421 {
5422 for (int dy = 0; dy < c_dofs1D; ++dy)
5423 {
5424 for (int ex = 0; ex < c_dofs1D; ++ex)
5425 {
5426 w2[ex][dy][dz] = 0.0;
5427 for (int ey = 0; ey < o_dofs1D; ++ey)
5428 {
5429 w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
5430 }
5431 }
5432 }
5433 }
5434
5435 // contract in x
5436 for (int dz = 0; dz < c_dofs1D; ++dz)
5437 {
5438 for (int dy = 0; dy < c_dofs1D; ++dy)
5439 {
5440 for (int dx = 0; dx < c_dofs1D; ++dx)
5441 {
5442 double s = 0.0;
5443 for (int ex = 0; ex < c_dofs1D; ++ex)
5444 {
5445 s += B(ex, dx) * w2[ex][dy][dz];
5446 }
5447 y(dx, dy, dz, e) += s;
5448 }
5449 }
5450 }
5451
5452 // ---
5453 // dofs that point parallel to z-axis (open in z, closed in x, y)
5454 // ---
5455
5456 // contract in z
5457 for (int dz = 0; dz < c_dofs1D; ++dz)
5458 {
5459 for (int ex = 0; ex < c_dofs1D; ++ex)
5460 {
5461 for (int ey = 0; ey < c_dofs1D; ++ey)
5462 {
5463 w1[ex][ey][dz] = 0.0;
5464 for (int ez = 0; ez < o_dofs1D; ++ez)
5465 {
5466 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5467 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5468 w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
5469 }
5470 }
5471 }
5472 }
5473
5474 // contract in y
5475 for (int dz = 0; dz < c_dofs1D; ++dz)
5476 {
5477 for (int dy = 0; dy < c_dofs1D; ++dy)
5478 {
5479 for (int ex = 0; ex < c_dofs1D; ++ex)
5480 {
5481 w2[ex][dy][dz] = 0.0;
5482 for (int ey = 0; ey < c_dofs1D; ++ey)
5483 {
5484 w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
5485 }
5486 }
5487 }
5488 }
5489
5490 // contract in x
5491 for (int dz = 0; dz < c_dofs1D; ++dz)
5492 {
5493 for (int dy = 0; dy < c_dofs1D; ++dy)
5494 {
5495 for (int dx = 0; dx < c_dofs1D; ++dx)
5496 {
5497 double s = 0.0;
5498 for (int ex = 0; ex < c_dofs1D; ++ex)
5499 {
5500 s += B(ex, dx) * w2[ex][dy][dz];
5501 }
5502 y(dx, dy, dz, e) += s;
5503 }
5504 }
5505 }
5506 });
5507 }
5508
5509 // Specialization of PAHcurlApplyGradientTranspose3D to the case where
PAHcurlApplyGradientTranspose3DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)5510 static void PAHcurlApplyGradientTranspose3DBId(
5511 const int c_dofs1D, const int o_dofs1D, const int NE,
5512 const Array<double> &G_,
5513 const Vector &x_, Vector &y_)
5514 {
5515 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5516
5517 auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5518 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5519
5520 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5521 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5522
5523 MFEM_FORALL(e, NE,
5524 {
5525 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5526 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5527 // ---
5528 // dofs that point parallel to x-axis (open in x, closed in y, z)
5529 // ---
5530
5531 // contract in z
5532 for (int dz = 0; dz < c_dofs1D; ++dz)
5533 {
5534 for (int ex = 0; ex < o_dofs1D; ++ex)
5535 {
5536 for (int ey = 0; ey < c_dofs1D; ++ey)
5537 {
5538 const int ez = dz;
5539 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5540 w1[ex][ey][dz] = x(local_index, e);
5541 }
5542 }
5543 }
5544
5545 // contract in y
5546 for (int dz = 0; dz < c_dofs1D; ++dz)
5547 {
5548 for (int dy = 0; dy < c_dofs1D; ++dy)
5549 {
5550 for (int ex = 0; ex < o_dofs1D; ++ex)
5551 {
5552 const int ey = dy;
5553 w2[ex][dy][dz] = w1[ex][ey][dz];
5554 }
5555 }
5556 }
5557
5558 // contract in x
5559 for (int dz = 0; dz < c_dofs1D; ++dz)
5560 {
5561 for (int dy = 0; dy < c_dofs1D; ++dy)
5562 {
5563 for (int dx = 0; dx < c_dofs1D; ++dx)
5564 {
5565 double s = 0.0;
5566 for (int ex = 0; ex < o_dofs1D; ++ex)
5567 {
5568 s += G(ex, dx) * w2[ex][dy][dz];
5569 }
5570 y(dx, dy, dz, e) += s;
5571 }
5572 }
5573 }
5574
5575 // ---
5576 // dofs that point parallel to y-axis (open in y, closed in x, z)
5577 // ---
5578
5579 // contract in z
5580 for (int dz = 0; dz < c_dofs1D; ++dz)
5581 {
5582 for (int ex = 0; ex < c_dofs1D; ++ex)
5583 {
5584 for (int ey = 0; ey < o_dofs1D; ++ey)
5585 {
5586 const int ez = dz;
5587 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5588 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5589 w1[ex][ey][dz] = x(local_index, e);
5590 }
5591 }
5592 }
5593
5594 // contract in y
5595 for (int dz = 0; dz < c_dofs1D; ++dz)
5596 {
5597 for (int dy = 0; dy < c_dofs1D; ++dy)
5598 {
5599 for (int ex = 0; ex < c_dofs1D; ++ex)
5600 {
5601 w2[ex][dy][dz] = 0.0;
5602 for (int ey = 0; ey < o_dofs1D; ++ey)
5603 {
5604 w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
5605 }
5606 }
5607 }
5608 }
5609
5610 // contract in x
5611 for (int dz = 0; dz < c_dofs1D; ++dz)
5612 {
5613 for (int dy = 0; dy < c_dofs1D; ++dy)
5614 {
5615 for (int dx = 0; dx < c_dofs1D; ++dx)
5616 {
5617 const int ex = dx;
5618 double s = w2[ex][dy][dz];
5619 y(dx, dy, dz, e) += s;
5620 }
5621 }
5622 }
5623
5624 // ---
5625 // dofs that point parallel to z-axis (open in z, closed in x, y)
5626 // ---
5627
5628 // contract in z
5629 for (int dz = 0; dz < c_dofs1D; ++dz)
5630 {
5631 for (int ex = 0; ex < c_dofs1D; ++ex)
5632 {
5633 for (int ey = 0; ey < c_dofs1D; ++ey)
5634 {
5635 w1[ex][ey][dz] = 0.0;
5636 for (int ez = 0; ez < o_dofs1D; ++ez)
5637 {
5638 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5639 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5640 w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
5641 }
5642 }
5643 }
5644 }
5645
5646 // contract in y
5647 for (int dz = 0; dz < c_dofs1D; ++dz)
5648 {
5649 for (int dy = 0; dy < c_dofs1D; ++dy)
5650 {
5651 for (int ex = 0; ex < c_dofs1D; ++ex)
5652 {
5653 const int ey = dy;
5654 w2[ex][dy][dz] = w1[ex][ey][dz];
5655 }
5656 }
5657 }
5658
5659 // contract in x
5660 for (int dz = 0; dz < c_dofs1D; ++dz)
5661 {
5662 for (int dy = 0; dy < c_dofs1D; ++dy)
5663 {
5664 for (int dx = 0; dx < c_dofs1D; ++dx)
5665 {
5666 const int ex = dx;
5667 double s = w2[ex][dy][dz];
5668 y(dx, dy, dz, e) += s;
5669 }
5670 }
5671 }
5672 });
5673 }
5674
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)5675 void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
5676 const FiniteElementSpace &test_fes)
5677 {
5678 // Assumes tensor-product elements, with a vector test space and H^1 trial space.
5679 Mesh *mesh = trial_fes.GetMesh();
5680 const FiniteElement *trial_fel = trial_fes.GetFE(0);
5681 const FiniteElement *test_fel = test_fes.GetFE(0);
5682
5683 const NodalTensorFiniteElement *trial_el =
5684 dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
5685 MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
5686
5687 const VectorTensorFiniteElement *test_el =
5688 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
5689 MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
5690
5691 const int dims = trial_el->GetDim();
5692 MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!");
5693 dim = mesh->Dimension();
5694 MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!");
5695 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(),
5696 "Orders do not match!");
5697 ne = trial_fes.GetNE();
5698
5699 const int order = trial_el->GetOrder();
5700 dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType());
5701 mfem::QuadratureFunctions1D qf1d;
5702 mfem::IntegrationRule closed_ir;
5703 closed_ir.SetSize(order + 1);
5704 qf1d.GaussLobatto(order + 1, &closed_ir);
5705 mfem::IntegrationRule open_ir;
5706 open_ir.SetSize(order);
5707 qf1d.GaussLegendre(order, &open_ir);
5708
5709 maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
5710 o_dofs1D = maps_O_C->nqpt;
5711 if (trial_el->GetBasisType() == BasisType::GaussLobatto)
5712 {
5713 B_id = true;
5714 c_dofs1D = maps_O_C->ndof;
5715 }
5716 else
5717 {
5718 B_id = false;
5719 maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
5720 c_dofs1D = maps_C_C->nqpt;
5721 }
5722 }
5723
AddMultPA(const Vector & x,Vector & y) const5724 void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const
5725 {
5726 if (dim == 3)
5727 {
5728 if (B_id)
5729 {
5730 PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne,
5731 maps_O_C->G, x, y);
5732 }
5733 else
5734 {
5735 PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5736 maps_O_C->G, x, y);
5737 }
5738 }
5739 else if (dim == 2)
5740 {
5741 if (B_id)
5742 {
5743 PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne,
5744 maps_O_C->G, x, y);
5745 }
5746 else
5747 {
5748 PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G,
5749 x, y);
5750 }
5751 }
5752 else
5753 {
5754 mfem_error("Bad dimension!");
5755 }
5756 }
5757
AddMultTransposePA(const Vector & x,Vector & y) const5758 void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
5759 {
5760 if (dim == 3)
5761 {
5762 if (B_id)
5763 {
5764 PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne,
5765 maps_O_C->G, x, y);
5766 }
5767 else
5768 {
5769 PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5770 maps_O_C->G, x, y);
5771 }
5772 }
5773 else if (dim == 2)
5774 {
5775 if (B_id)
5776 {
5777 PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne,
5778 maps_O_C->G, x, y);
5779 }
5780 else
5781 {
5782 PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5783 maps_O_C->G, x, y);
5784 }
5785 }
5786 else
5787 {
5788 mfem_error("Bad dimension!");
5789 }
5790 }
5791
PAHcurlVecH1IdentityApply3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)5792 static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D,
5793 const int o_dofs1D,
5794 const int NE,
5795 const Array<double> &Bclosed,
5796 const Array<double> &Bopen,
5797 const Vector &pa_data,
5798 const Vector &x_,
5799 Vector &y_)
5800 {
5801 auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
5802 auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
5803
5804 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
5805 auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5806
5807 auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
5808 NE);
5809
5810 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5811 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5812
5813 MFEM_FORALL(e, NE,
5814 {
5815 double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
5816 double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
5817
5818 // dofs that point parallel to x-axis (open in x, closed in y, z)
5819
5820 // contract in z
5821 for (int ez = 0; ez < c_dofs1D; ++ez)
5822 {
5823 for (int dx = 0; dx < c_dofs1D; ++dx)
5824 {
5825 for (int dy = 0; dy < c_dofs1D; ++dy)
5826 {
5827 for (int j=0; j<3; ++j)
5828 {
5829 w1[j][dx][dy][ez] = 0.0;
5830 for (int dz = 0; dz < c_dofs1D; ++dz)
5831 {
5832 w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
5833 }
5834 }
5835 }
5836 }
5837 }
5838
5839 // contract in y
5840 for (int ez = 0; ez < c_dofs1D; ++ez)
5841 {
5842 for (int ey = 0; ey < c_dofs1D; ++ey)
5843 {
5844 for (int dx = 0; dx < c_dofs1D; ++dx)
5845 {
5846 for (int j=0; j<3; ++j)
5847 {
5848 w2[j][dx][ey][ez] = 0.0;
5849 for (int dy = 0; dy < c_dofs1D; ++dy)
5850 {
5851 w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
5852 }
5853 }
5854 }
5855 }
5856 }
5857
5858 // contract in x
5859 for (int ez = 0; ez < c_dofs1D; ++ez)
5860 {
5861 for (int ey = 0; ey < c_dofs1D; ++ey)
5862 {
5863 for (int ex = 0; ex < o_dofs1D; ++ex)
5864 {
5865 for (int j=0; j<3; ++j)
5866 {
5867 double s = 0.0;
5868 for (int dx = 0; dx < c_dofs1D; ++dx)
5869 {
5870 s += Bo(ex, dx) * w2[j][dx][ey][ez];
5871 }
5872 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5873 y(local_index, e) += s * vk(j, local_index, e);
5874 }
5875 }
5876 }
5877 }
5878
5879 // dofs that point parallel to y-axis (open in y, closed in x, z)
5880
5881 // contract in z
5882 for (int ez = 0; ez < c_dofs1D; ++ez)
5883 {
5884 for (int dx = 0; dx < c_dofs1D; ++dx)
5885 {
5886 for (int dy = 0; dy < c_dofs1D; ++dy)
5887 {
5888 for (int j=0; j<3; ++j)
5889 {
5890 w1[j][dx][dy][ez] = 0.0;
5891 for (int dz = 0; dz < c_dofs1D; ++dz)
5892 {
5893 w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
5894 }
5895 }
5896 }
5897 }
5898 }
5899
5900 // contract in y
5901 for (int ez = 0; ez < c_dofs1D; ++ez)
5902 {
5903 for (int ey = 0; ey < o_dofs1D; ++ey)
5904 {
5905 for (int dx = 0; dx < c_dofs1D; ++dx)
5906 {
5907 for (int j=0; j<3; ++j)
5908 {
5909 w2[j][dx][ey][ez] = 0.0;
5910 for (int dy = 0; dy < c_dofs1D; ++dy)
5911 {
5912 w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez];
5913 }
5914 }
5915 }
5916 }
5917 }
5918
5919 // contract in x
5920 for (int ez = 0; ez < c_dofs1D; ++ez)
5921 {
5922 for (int ey = 0; ey < o_dofs1D; ++ey)
5923 {
5924 for (int ex = 0; ex < c_dofs1D; ++ex)
5925 {
5926 for (int j=0; j<3; ++j)
5927 {
5928 double s = 0.0;
5929 for (int dx = 0; dx < c_dofs1D; ++dx)
5930 {
5931 s += Bc(ex, dx) * w2[j][dx][ey][ez];
5932 }
5933 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5934 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5935 y(local_index, e) += s * vk(j, local_index, e);
5936 }
5937 }
5938 }
5939 }
5940
5941 // dofs that point parallel to z-axis (open in z, closed in x, y)
5942
5943 // contract in z
5944 for (int ez = 0; ez < o_dofs1D; ++ez)
5945 {
5946 for (int dx = 0; dx < c_dofs1D; ++dx)
5947 {
5948 for (int dy = 0; dy < c_dofs1D; ++dy)
5949 {
5950 for (int j=0; j<3; ++j)
5951 {
5952 w1[j][dx][dy][ez] = 0.0;
5953 for (int dz = 0; dz < c_dofs1D; ++dz)
5954 {
5955 w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e);
5956 }
5957 }
5958 }
5959 }
5960 }
5961
5962 // contract in y
5963 for (int ez = 0; ez < o_dofs1D; ++ez)
5964 {
5965 for (int ey = 0; ey < c_dofs1D; ++ey)
5966 {
5967 for (int dx = 0; dx < c_dofs1D; ++dx)
5968 {
5969 for (int j=0; j<3; ++j)
5970 {
5971 w2[j][dx][ey][ez] = 0.0;
5972 for (int dy = 0; dy < c_dofs1D; ++dy)
5973 {
5974 w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
5975 }
5976 }
5977 }
5978 }
5979 }
5980
5981 // contract in x
5982 for (int ez = 0; ez < o_dofs1D; ++ez)
5983 {
5984 for (int ey = 0; ey < c_dofs1D; ++ey)
5985 {
5986 for (int ex = 0; ex < c_dofs1D; ++ex)
5987 {
5988 for (int j=0; j<3; ++j)
5989 {
5990 double s = 0.0;
5991 for (int dx = 0; dx < c_dofs1D; ++dx)
5992 {
5993 s += Bc(ex, dx) * w2[j][dx][ey][ez];
5994 }
5995 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5996 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5997 y(local_index, e) += s * vk(j, local_index, e);
5998 }
5999 }
6000 }
6001 }
6002 });
6003 }
6004
PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)6005 static void PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D,
6006 const int o_dofs1D,
6007 const int NE,
6008 const Array<double> &Bclosed,
6009 const Array<double> &Bopen,
6010 const Vector &pa_data,
6011 const Vector &x_,
6012 Vector &y_)
6013 {
6014 auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6015 auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6016
6017 auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6018 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
6019
6020 auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
6021 NE);
6022
6023 constexpr static int MAX_D1D = HCURL_MAX_D1D;
6024
6025 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6026
6027 MFEM_FORALL(e, NE,
6028 {
6029 double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
6030 double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
6031
6032 // dofs that point parallel to x-axis (open in x, closed in y, z)
6033
6034 // contract in x
6035 for (int ez = 0; ez < c_dofs1D; ++ez)
6036 {
6037 for (int ey = 0; ey < c_dofs1D; ++ey)
6038 {
6039 for (int j=0; j<3; ++j)
6040 {
6041 for (int dx = 0; dx < c_dofs1D; ++dx)
6042 {
6043 w2[j][dx][ey][ez] = 0.0;
6044 }
6045 for (int ex = 0; ex < o_dofs1D; ++ex)
6046 {
6047 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6048 const double xv = x(local_index, e) * vk(j, local_index, e);
6049 for (int dx = 0; dx < c_dofs1D; ++dx)
6050 {
6051 w2[j][dx][ey][ez] += xv * Bo(ex, dx);
6052 }
6053 }
6054 }
6055 }
6056 }
6057
6058 // contract in y
6059 for (int ez = 0; ez < c_dofs1D; ++ez)
6060 {
6061 for (int dx = 0; dx < c_dofs1D; ++dx)
6062 {
6063 for (int dy = 0; dy < c_dofs1D; ++dy)
6064 {
6065 for (int j=0; j<3; ++j)
6066 {
6067 w1[j][dx][dy][ez] = 0.0;
6068 for (int ey = 0; ey < c_dofs1D; ++ey)
6069 {
6070 w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy);
6071 }
6072 }
6073 }
6074 }
6075 }
6076
6077 // contract in z
6078 for (int dx = 0; dx < c_dofs1D; ++dx)
6079 {
6080 for (int dy = 0; dy < c_dofs1D; ++dy)
6081 {
6082 for (int dz = 0; dz < c_dofs1D; ++dz)
6083 {
6084 for (int j=0; j<3; ++j)
6085 {
6086 double s = 0.0;
6087 for (int ez = 0; ez < c_dofs1D; ++ez)
6088 {
6089 s += w1[j][dx][dy][ez] * Bc(ez, dz);
6090 }
6091 y(dx, dy, dz, j, e) += s;
6092 }
6093 }
6094 }
6095 }
6096
6097 // dofs that point parallel to y-axis (open in y, closed in x, z)
6098
6099 // contract in x
6100 for (int ez = 0; ez < c_dofs1D; ++ez)
6101 {
6102 for (int ey = 0; ey < o_dofs1D; ++ey)
6103 {
6104 for (int j=0; j<3; ++j)
6105 {
6106 for (int dx = 0; dx < c_dofs1D; ++dx)
6107 {
6108 w2[j][dx][ey][ez] = 0.0;
6109 }
6110 for (int ex = 0; ex < c_dofs1D; ++ex)
6111 {
6112 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6113 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6114 const double xv = x(local_index, e) * vk(j, local_index, e);
6115 for (int dx = 0; dx < c_dofs1D; ++dx)
6116 {
6117 w2[j][dx][ey][ez] += xv * Bc(ex, dx);
6118 }
6119 }
6120 }
6121 }
6122 }
6123
6124 // contract in y
6125 for (int ez = 0; ez < c_dofs1D; ++ez)
6126 {
6127 for (int dx = 0; dx < c_dofs1D; ++dx)
6128 {
6129 for (int dy = 0; dy < c_dofs1D; ++dy)
6130 {
6131 for (int j=0; j<3; ++j)
6132 {
6133 w1[j][dx][dy][ez] = 0.0;
6134 for (int ey = 0; ey < o_dofs1D; ++ey)
6135 {
6136 w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bo(ey, dy);
6137 }
6138 }
6139 }
6140 }
6141 }
6142
6143 // contract in z
6144 for (int dx = 0; dx < c_dofs1D; ++dx)
6145 {
6146 for (int dy = 0; dy < c_dofs1D; ++dy)
6147 {
6148 for (int dz = 0; dz < c_dofs1D; ++dz)
6149 {
6150 for (int j=0; j<3; ++j)
6151 {
6152 double s = 0.0;
6153 for (int ez = 0; ez < c_dofs1D; ++ez)
6154 {
6155 s += w1[j][dx][dy][ez] * Bc(ez, dz);
6156 }
6157 y(dx, dy, dz, j, e) += s;
6158 }
6159 }
6160 }
6161 }
6162
6163 // dofs that point parallel to z-axis (open in z, closed in x, y)
6164
6165 // contract in x
6166 for (int ez = 0; ez < o_dofs1D; ++ez)
6167 {
6168 for (int ey = 0; ey < c_dofs1D; ++ey)
6169 {
6170 for (int j=0; j<3; ++j)
6171 {
6172 for (int dx = 0; dx < c_dofs1D; ++dx)
6173 {
6174 w2[j][dx][ey][ez] = 0.0;
6175 }
6176 for (int ex = 0; ex < c_dofs1D; ++ex)
6177 {
6178 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6179 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6180 const double xv = x(local_index, e) * vk(j, local_index, e);
6181 for (int dx = 0; dx < c_dofs1D; ++dx)
6182 {
6183 w2[j][dx][ey][ez] += xv * Bc(ex, dx);
6184 }
6185 }
6186 }
6187 }
6188 }
6189
6190 // contract in y
6191 for (int ez = 0; ez < o_dofs1D; ++ez)
6192 {
6193 for (int dx = 0; dx < c_dofs1D; ++dx)
6194 {
6195 for (int dy = 0; dy < c_dofs1D; ++dy)
6196 {
6197 for (int j=0; j<3; ++j)
6198 {
6199 w1[j][dx][dy][ez] = 0.0;
6200 for (int ey = 0; ey < c_dofs1D; ++ey)
6201 {
6202 w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy);
6203 }
6204 }
6205 }
6206 }
6207 }
6208
6209 // contract in z
6210 for (int dx = 0; dx < c_dofs1D; ++dx)
6211 {
6212 for (int dy = 0; dy < c_dofs1D; ++dy)
6213 {
6214 for (int dz = 0; dz < c_dofs1D; ++dz)
6215 {
6216 for (int j=0; j<3; ++j)
6217 {
6218 double s = 0.0;
6219 for (int ez = 0; ez < o_dofs1D; ++ez)
6220 {
6221 s += w1[j][dx][dy][ez] * Bo(ez, dz);
6222 }
6223 y(dx, dy, dz, j, e) += s;
6224 }
6225 }
6226 }
6227 }
6228 });
6229 }
6230
PAHcurlVecH1IdentityApply2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)6231 static void PAHcurlVecH1IdentityApply2D(const int c_dofs1D,
6232 const int o_dofs1D,
6233 const int NE,
6234 const Array<double> &Bclosed,
6235 const Array<double> &Bopen,
6236 const Vector &pa_data,
6237 const Vector &x_,
6238 Vector &y_)
6239 {
6240 auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6241 auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6242
6243 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, 2, NE);
6244 auto y = Reshape(y_.ReadWrite(), (2 * c_dofs1D * o_dofs1D), NE);
6245
6246 auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE);
6247
6248 constexpr static int MAX_D1D = HCURL_MAX_D1D;
6249
6250 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6251
6252 MFEM_FORALL(e, NE,
6253 {
6254 double w[2][MAX_D1D][MAX_D1D];
6255
6256 // dofs that point parallel to x-axis (open in x, closed in y)
6257
6258 // contract in y
6259 for (int ey = 0; ey < c_dofs1D; ++ey)
6260 {
6261 for (int dx = 0; dx < c_dofs1D; ++dx)
6262 {
6263 for (int j=0; j<2; ++j)
6264 {
6265 w[j][dx][ey] = 0.0;
6266 for (int dy = 0; dy < c_dofs1D; ++dy)
6267 {
6268 w[j][dx][ey] += Bc(ey, dy) * x(dx, dy, j, e);
6269 }
6270 }
6271 }
6272 }
6273
6274 // contract in x
6275 for (int ey = 0; ey < c_dofs1D; ++ey)
6276 {
6277 for (int ex = 0; ex < o_dofs1D; ++ex)
6278 {
6279 for (int j=0; j<2; ++j)
6280 {
6281 double s = 0.0;
6282 for (int dx = 0; dx < c_dofs1D; ++dx)
6283 {
6284 s += Bo(ex, dx) * w[j][dx][ey];
6285 }
6286 const int local_index = ey*o_dofs1D + ex;
6287 y(local_index, e) += s * vk(j, local_index, e);
6288 }
6289 }
6290 }
6291
6292 // dofs that point parallel to y-axis (open in y, closed in x)
6293
6294 // contract in y
6295 for (int ey = 0; ey < o_dofs1D; ++ey)
6296 {
6297 for (int dx = 0; dx < c_dofs1D; ++dx)
6298 {
6299 for (int j=0; j<2; ++j)
6300 {
6301 w[j][dx][ey] = 0.0;
6302 for (int dy = 0; dy < c_dofs1D; ++dy)
6303 {
6304 w[j][dx][ey] += Bo(ey, dy) * x(dx, dy, j, e);
6305 }
6306 }
6307 }
6308 }
6309
6310 // contract in x
6311 for (int ey = 0; ey < o_dofs1D; ++ey)
6312 {
6313 for (int ex = 0; ex < c_dofs1D; ++ex)
6314 {
6315 for (int j=0; j<2; ++j)
6316 {
6317 double s = 0.0;
6318 for (int dx = 0; dx < c_dofs1D; ++dx)
6319 {
6320 s += Bc(ex, dx) * w[j][dx][ey];
6321 }
6322 const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6323 y(local_index, e) += s * vk(j, local_index, e);
6324 }
6325 }
6326 }
6327 });
6328 }
6329
PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)6330 static void PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D,
6331 const int o_dofs1D,
6332 const int NE,
6333 const Array<double> &Bclosed,
6334 const Array<double> &Bopen,
6335 const Vector &pa_data,
6336 const Vector &x_,
6337 Vector &y_)
6338 {
6339 auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6340 auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6341
6342 auto x = Reshape(x_.Read(), (2 * c_dofs1D * o_dofs1D), NE);
6343 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, 2, NE);
6344
6345 auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE);
6346
6347 constexpr static int MAX_D1D = HCURL_MAX_D1D;
6348 //constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
6349
6350 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6351
6352 MFEM_FORALL(e, NE,
6353 {
6354 double w[2][MAX_D1D][MAX_D1D];
6355
6356 // dofs that point parallel to x-axis (open in x, closed in y)
6357
6358 // contract in x
6359 for (int ey = 0; ey < c_dofs1D; ++ey)
6360 {
6361 for (int dx = 0; dx < c_dofs1D; ++dx)
6362 {
6363 for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; }
6364 }
6365 for (int ex = 0; ex < o_dofs1D; ++ex)
6366 {
6367 const int local_index = ey*o_dofs1D + ex;
6368 const double xd = x(local_index, e);
6369
6370 for (int dx = 0; dx < c_dofs1D; ++dx)
6371 {
6372 for (int j=0; j<2; ++j)
6373 {
6374 w[j][dx][ey] += Bo(ex, dx) * xd * vk(j, local_index, e);
6375 }
6376 }
6377 }
6378 }
6379
6380 // contract in y
6381 for (int dx = 0; dx < c_dofs1D; ++dx)
6382 {
6383 for (int dy = 0; dy < c_dofs1D; ++dy)
6384 {
6385 for (int j=0; j<2; ++j)
6386 {
6387 double s = 0.0;
6388 for (int ey = 0; ey < c_dofs1D; ++ey)
6389 {
6390 s += w[j][dx][ey] * Bc(ey, dy);
6391 }
6392 y(dx, dy, j, e) += s;
6393 }
6394 }
6395 }
6396
6397 // dofs that point parallel to y-axis (open in y, closed in x)
6398
6399 // contract in x
6400 for (int ey = 0; ey < o_dofs1D; ++ey)
6401 {
6402 for (int dx = 0; dx < c_dofs1D; ++dx)
6403 {
6404 for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; }
6405 }
6406 for (int ex = 0; ex < c_dofs1D; ++ex)
6407 {
6408 const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6409 const double xd = x(local_index, e);
6410 for (int dx = 0; dx < c_dofs1D; ++dx)
6411 {
6412 for (int j=0; j<2; ++j)
6413 {
6414 w[j][dx][ey] += Bc(ex, dx) * xd * vk(j, local_index, e);
6415 }
6416 }
6417 }
6418 }
6419
6420 // contract in y
6421 for (int dx = 0; dx < c_dofs1D; ++dx)
6422 {
6423 for (int dy = 0; dy < c_dofs1D; ++dy)
6424 {
6425 for (int j=0; j<2; ++j)
6426 {
6427 double s = 0.0;
6428 for (int ey = 0; ey < o_dofs1D; ++ey)
6429 {
6430 s += w[j][dx][ey] * Bo(ey, dy);
6431 }
6432 y(dx, dy, j, e) += s;
6433 }
6434 }
6435 }
6436 });
6437 }
6438
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)6439 void IdentityInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
6440 const FiniteElementSpace &test_fes)
6441 {
6442 // Assumes tensor-product elements, with a vector test space and H^1 trial space.
6443 Mesh *mesh = trial_fes.GetMesh();
6444 const FiniteElement *trial_fel = trial_fes.GetFE(0);
6445 const FiniteElement *test_fel = test_fes.GetFE(0);
6446
6447 const NodalTensorFiniteElement *trial_el =
6448 dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
6449 MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
6450
6451 const VectorTensorFiniteElement *test_el =
6452 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
6453 MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
6454
6455 const int dims = trial_el->GetDim();
6456 MFEM_VERIFY(dims == 2 || dims == 3, "");
6457
6458 dim = mesh->Dimension();
6459 MFEM_VERIFY(dim == 2 || dim == 3, "");
6460
6461 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
6462
6463 ne = trial_fes.GetNE();
6464
6465 const int order = trial_el->GetOrder();
6466 dofquad_fe = new H1_SegmentElement(order);
6467 mfem::QuadratureFunctions1D qf1d;
6468 mfem::IntegrationRule closed_ir;
6469 closed_ir.SetSize(order + 1);
6470 qf1d.GaussLobatto(order + 1, &closed_ir);
6471 mfem::IntegrationRule open_ir;
6472 open_ir.SetSize(order);
6473 qf1d.GaussLegendre(order, &open_ir);
6474
6475 maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
6476 maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
6477
6478 o_dofs1D = maps_O_C->nqpt;
6479 c_dofs1D = maps_C_C->nqpt;
6480 MFEM_VERIFY(maps_O_C->ndof == c_dofs1D &&
6481 maps_C_C->ndof == c_dofs1D, "Discrepancy in the number of DOFs");
6482
6483 const int ndof_test = (dim == 3) ? 3 * c_dofs1D * c_dofs1D * o_dofs1D
6484 : 2 * c_dofs1D * o_dofs1D;
6485
6486 const IntegrationRule & Nodes = test_el->GetNodes();
6487
6488 pa_data.SetSize(dim * ndof_test * ne, Device::GetMemoryType());
6489 auto op = Reshape(pa_data.HostWrite(), dim, ndof_test, ne);
6490
6491 const Array<int> &dofmap = test_el->GetDofMap();
6492
6493 if (dim == 3)
6494 {
6495 // Note that ND_HexahedronElement uses 6 vectors in tk rather than 3, with
6496 // the last 3 having negative signs. Here the signs are all positive, as
6497 // signs are applied in ElementRestriction.
6498
6499 const double tk[9] = { 1.,0.,0., 0.,1.,0., 0.,0.,1. };
6500
6501 for (int c=0; c<3; ++c)
6502 {
6503 for (int i=0; i<ndof_test/3; ++i)
6504 {
6505 const int d = (c*ndof_test/3) + i;
6506 // ND_HexahedronElement sets dof2tk = (dofmap < 0) ? 3+c : c, but here
6507 // no signs should be applied due to ElementRestriction.
6508 const int dof2tk = c;
6509 const int id = (dofmap[d] >= 0) ? dofmap[d] : -1 - dofmap[d];
6510
6511 for (int e=0; e<ne; ++e)
6512 {
6513 double v[3];
6514 ElementTransformation *tr = mesh->GetElementTransformation(e);
6515 tr->SetIntPoint(&Nodes.IntPoint(id));
6516 tr->Jacobian().Mult(tk + dof2tk*dim, v);
6517
6518 for (int j=0; j<3; ++j)
6519 {
6520 op(j,d,e) = v[j];
6521 }
6522 }
6523 }
6524 }
6525 }
6526 else // 2D case
6527 {
6528 const double tk[4] = { 1.,0., 0.,1. };
6529 for (int c=0; c<2; ++c)
6530 {
6531 for (int i=0; i<ndof_test/2; ++i)
6532 {
6533 const int d = (c*ndof_test/2) + i;
6534 // ND_QuadrilateralElement sets dof2tk = (dofmap < 0) ? 2+c : c, but here
6535 // no signs should be applied due to ElementRestriction.
6536 const int dof2tk = c;
6537 const int id = (dofmap[d] >= 0) ? dofmap[d] : -1 - dofmap[d];
6538
6539 for (int e=0; e<ne; ++e)
6540 {
6541 double v[2];
6542 ElementTransformation *tr = mesh->GetElementTransformation(e);
6543 tr->SetIntPoint(&Nodes.IntPoint(id));
6544 tr->Jacobian().Mult(tk + dof2tk*dim, v);
6545
6546 for (int j=0; j<2; ++j)
6547 {
6548 op(j,d,e) = v[j];
6549 }
6550 }
6551 }
6552 }
6553 }
6554 }
6555
AddMultPA(const Vector & x,Vector & y) const6556 void IdentityInterpolator::AddMultPA(const Vector &x, Vector &y) const
6557 {
6558 if (dim == 3)
6559 {
6560 PAHcurlVecH1IdentityApply3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B,
6561 pa_data, x, y);
6562 }
6563 else if (dim == 2)
6564 {
6565 PAHcurlVecH1IdentityApply2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B,
6566 pa_data, x, y);
6567 }
6568 else
6569 {
6570 mfem_error("Bad dimension!");
6571 }
6572 }
6573
AddMultTransposePA(const Vector & x,Vector & y) const6574 void IdentityInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
6575 {
6576 if (dim == 3)
6577 {
6578 PAHcurlVecH1IdentityApplyTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6579 maps_O_C->B, pa_data, x, y);
6580 }
6581 else if (dim == 2)
6582 {
6583 PAHcurlVecH1IdentityApplyTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6584 maps_O_C->B, pa_data, x, y);
6585 }
6586 else
6587 {
6588 mfem_error("Bad dimension!");
6589 }
6590 }
6591
6592 template void SmemPAHcurlMassAssembleDiagonal3D<0,0>(const int D1D,
6593 const int Q1D,
6594 const int NE,
6595 const bool symmetric,
6596 const Array<double> &bo,
6597 const Array<double> &bc,
6598 const Vector &pa_data,
6599 Vector &diag);
6600
6601 template void SmemPAHcurlMassAssembleDiagonal3D<2,3>(const int D1D,
6602 const int Q1D,
6603 const int NE,
6604 const bool symmetric,
6605 const Array<double> &bo,
6606 const Array<double> &bc,
6607 const Vector &pa_data,
6608 Vector &diag);
6609
6610 template void SmemPAHcurlMassAssembleDiagonal3D<3,4>(const int D1D,
6611 const int Q1D,
6612 const int NE,
6613 const bool symmetric,
6614 const Array<double> &bo,
6615 const Array<double> &bc,
6616 const Vector &pa_data,
6617 Vector &diag);
6618
6619 template void SmemPAHcurlMassAssembleDiagonal3D<4,5>(const int D1D,
6620 const int Q1D,
6621 const int NE,
6622 const bool symmetric,
6623 const Array<double> &bo,
6624 const Array<double> &bc,
6625 const Vector &pa_data,
6626 Vector &diag);
6627
6628 template void SmemPAHcurlMassAssembleDiagonal3D<5,6>(const int D1D,
6629 const int Q1D,
6630 const int NE,
6631 const bool symmetric,
6632 const Array<double> &bo,
6633 const Array<double> &bc,
6634 const Vector &pa_data,
6635 Vector &diag);
6636
6637 template void SmemPAHcurlMassApply3D<0,0>(const int D1D,
6638 const int Q1D,
6639 const int NE,
6640 const bool symmetric,
6641 const Array<double> &bo,
6642 const Array<double> &bc,
6643 const Array<double> &bot,
6644 const Array<double> &bct,
6645 const Vector &pa_data,
6646 const Vector &x,
6647 Vector &y);
6648
6649 template void SmemPAHcurlMassApply3D<2,3>(const int D1D,
6650 const int Q1D,
6651 const int NE,
6652 const bool symmetric,
6653 const Array<double> &bo,
6654 const Array<double> &bc,
6655 const Array<double> &bot,
6656 const Array<double> &bct,
6657 const Vector &pa_data,
6658 const Vector &x,
6659 Vector &y);
6660
6661 template void SmemPAHcurlMassApply3D<3,4>(const int D1D,
6662 const int Q1D,
6663 const int NE,
6664 const bool symmetric,
6665 const Array<double> &bo,
6666 const Array<double> &bc,
6667 const Array<double> &bot,
6668 const Array<double> &bct,
6669 const Vector &pa_data,
6670 const Vector &x,
6671 Vector &y);
6672
6673 template void SmemPAHcurlMassApply3D<4,5>(const int D1D,
6674 const int Q1D,
6675 const int NE,
6676 const bool symmetric,
6677 const Array<double> &bo,
6678 const Array<double> &bc,
6679 const Array<double> &bot,
6680 const Array<double> &bct,
6681 const Vector &pa_data,
6682 const Vector &x,
6683 Vector &y);
6684
6685 template void SmemPAHcurlMassApply3D<5,6>(const int D1D,
6686 const int Q1D,
6687 const int NE,
6688 const bool symmetric,
6689 const Array<double> &bo,
6690 const Array<double> &bc,
6691 const Array<double> &bot,
6692 const Array<double> &bct,
6693 const Vector &pa_data,
6694 const Vector &x,
6695 Vector &y);
6696
6697 } // namespace mfem
6698