1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
PAHcurlMassApply2D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)21 void PAHcurlMassApply2D(const int D1D,
22                         const int Q1D,
23                         const int NE,
24                         const bool symmetric,
25                         const Array<double> &bo,
26                         const Array<double> &bc,
27                         const Array<double> &bot,
28                         const Array<double> &bct,
29                         const Vector &pa_data,
30                         const Vector &x,
31                         Vector &y)
32 {
33    constexpr static int VDIM = 2;
34    constexpr static int MAX_D1D = HCURL_MAX_D1D;
35    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
36 
37    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
38    auto Bc = Reshape(bc.Read(), Q1D, D1D);
39    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
40    auto Bct = Reshape(bct.Read(), D1D, Q1D);
41    auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
42    auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
43    auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
44 
45    MFEM_FORALL(e, NE,
46    {
47       double mass[MAX_Q1D][MAX_Q1D][VDIM];
48 
49       for (int qy = 0; qy < Q1D; ++qy)
50       {
51          for (int qx = 0; qx < Q1D; ++qx)
52          {
53             for (int c = 0; c < VDIM; ++c)
54             {
55                mass[qy][qx][c] = 0.0;
56             }
57          }
58       }
59 
60       int osc = 0;
61 
62       for (int c = 0; c < VDIM; ++c)  // loop over x, y components
63       {
64          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
65          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
66 
67          for (int dy = 0; dy < D1Dy; ++dy)
68          {
69             double massX[MAX_Q1D];
70             for (int qx = 0; qx < Q1D; ++qx)
71             {
72                massX[qx] = 0.0;
73             }
74 
75             for (int dx = 0; dx < D1Dx; ++dx)
76             {
77                const double t = X(dx + (dy * D1Dx) + osc, e);
78                for (int qx = 0; qx < Q1D; ++qx)
79                {
80                   massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
81                }
82             }
83 
84             for (int qy = 0; qy < Q1D; ++qy)
85             {
86                const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
87                for (int qx = 0; qx < Q1D; ++qx)
88                {
89                   mass[qy][qx][c] += massX[qx] * wy;
90                }
91             }
92          }
93 
94          osc += D1Dx * D1Dy;
95       }  // loop (c) over components
96 
97       // Apply D operator.
98       for (int qy = 0; qy < Q1D; ++qy)
99       {
100          for (int qx = 0; qx < Q1D; ++qx)
101          {
102             const double O11 = op(qx,qy,0,e);
103             const double O21 = op(qx,qy,1,e);
104             const double O12 = symmetric ? O21 : op(qx,qy,2,e);
105             const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
106             const double massX = mass[qy][qx][0];
107             const double massY = mass[qy][qx][1];
108             mass[qy][qx][0] = (O11*massX)+(O12*massY);
109             mass[qy][qx][1] = (O21*massX)+(O22*massY);
110          }
111       }
112 
113       for (int qy = 0; qy < Q1D; ++qy)
114       {
115          osc = 0;
116 
117          for (int c = 0; c < VDIM; ++c)  // loop over x, y components
118          {
119             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
120             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
121 
122             double massX[MAX_D1D];
123             for (int dx = 0; dx < D1Dx; ++dx)
124             {
125                massX[dx] = 0.0;
126             }
127             for (int qx = 0; qx < Q1D; ++qx)
128             {
129                for (int dx = 0; dx < D1Dx; ++dx)
130                {
131                   massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
132                }
133             }
134 
135             for (int dy = 0; dy < D1Dy; ++dy)
136             {
137                const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
138 
139                for (int dx = 0; dx < D1Dx; ++dx)
140                {
141                   Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
142                }
143             }
144 
145             osc += D1Dx * D1Dy;
146          }  // loop c
147       }  // loop qy
148    }); // end of element loop
149 }
150 
PAHcurlMassAssembleDiagonal2D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Vector & pa_data,Vector & diag)151 void PAHcurlMassAssembleDiagonal2D(const int D1D,
152                                    const int Q1D,
153                                    const int NE,
154                                    const bool symmetric,
155                                    const Array<double> &bo,
156                                    const Array<double> &bc,
157                                    const Vector &pa_data,
158                                    Vector &diag)
159 {
160    constexpr static int VDIM = 2;
161    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
162 
163    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
164    auto Bc = Reshape(bc.Read(), Q1D, D1D);
165    auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
166    auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
167 
168    MFEM_FORALL(e, NE,
169    {
170       int osc = 0;
171 
172       for (int c = 0; c < VDIM; ++c)  // loop over x, y components
173       {
174          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
175          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
176 
177          double mass[MAX_Q1D];
178 
179          for (int dy = 0; dy < D1Dy; ++dy)
180          {
181             for (int qx = 0; qx < Q1D; ++qx)
182             {
183                mass[qx] = 0.0;
184                for (int qy = 0; qy < Q1D; ++qy)
185                {
186                   const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
187 
188                   mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
189                   op(qx,qy,symmetric ? 2 : 3, e));
190                }
191             }
192 
193             for (int dx = 0; dx < D1Dx; ++dx)
194             {
195                for (int qx = 0; qx < Q1D; ++qx)
196                {
197                   const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
198                   D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
199                }
200             }
201          }
202 
203          osc += D1Dx * D1Dy;
204       }  // loop c
205    }); // end of element loop
206 }
207 
PAHcurlMassAssembleDiagonal3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Vector & pa_data,Vector & diag)208 void PAHcurlMassAssembleDiagonal3D(const int D1D,
209                                    const int Q1D,
210                                    const int NE,
211                                    const bool symmetric,
212                                    const Array<double> &bo,
213                                    const Array<double> &bc,
214                                    const Vector &pa_data,
215                                    Vector &diag)
216 {
217    constexpr static int MAX_D1D = HCURL_MAX_D1D;
218    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
219 
220    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
221    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
222    constexpr static int VDIM = 3;
223 
224    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
225    auto Bc = Reshape(bc.Read(), Q1D, D1D);
226    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
227    auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
228 
229    MFEM_FORALL(e, NE,
230    {
231       int osc = 0;
232 
233       for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
234       {
235          const int D1Dz = (c == 2) ? D1D - 1 : D1D;
236          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
237          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
238 
239          const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
240          (symmetric ? 5 : 8));
241 
242          double mass[MAX_Q1D];
243 
244          for (int dz = 0; dz < D1Dz; ++dz)
245          {
246             for (int dy = 0; dy < D1Dy; ++dy)
247             {
248                for (int qx = 0; qx < Q1D; ++qx)
249                {
250                   mass[qx] = 0.0;
251                   for (int qy = 0; qy < Q1D; ++qy)
252                   {
253                      const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
254 
255                      for (int qz = 0; qz < Q1D; ++qz)
256                      {
257                         const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
258 
259                         mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
260                      }
261                   }
262                }
263 
264                for (int dx = 0; dx < D1Dx; ++dx)
265                {
266                   for (int qx = 0; qx < Q1D; ++qx)
267                   {
268                      const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
269                      D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
270                   }
271                }
272             }
273          }
274 
275          osc += D1Dx * D1Dy * D1Dz;
276       }  // loop c
277    }); // end of element loop
278 }
279 
280 template<int T_D1D, int T_Q1D>
SmemPAHcurlMassAssembleDiagonal3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Vector & pa_data,Vector & diag)281 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
282                                        const int Q1D,
283                                        const int NE,
284                                        const bool symmetric,
285                                        const Array<double> &bo,
286                                        const Array<double> &bc,
287                                        const Vector &pa_data,
288                                        Vector &diag)
289 {
290    MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
291    MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
292 
293    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
294    auto Bc = Reshape(bc.Read(), Q1D, D1D);
295    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
296    auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
297 
298    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
299    {
300       constexpr int VDIM = 3;
301       constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
302       constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
303 
304       MFEM_SHARED double sBo[tQ1D][tD1D];
305       MFEM_SHARED double sBc[tQ1D][tD1D];
306 
307       double op3[3];
308       MFEM_SHARED double sop[3][tQ1D][tQ1D];
309 
310       MFEM_FOREACH_THREAD(qx,x,Q1D)
311       {
312          MFEM_FOREACH_THREAD(qy,y,Q1D)
313          {
314             MFEM_FOREACH_THREAD(qz,z,Q1D)
315             {
316                op3[0] = op(qx,qy,qz,0,e);
317                op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
318                op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
319             }
320          }
321       }
322 
323       const int tidx = MFEM_THREAD_ID(x);
324       const int tidy = MFEM_THREAD_ID(y);
325       const int tidz = MFEM_THREAD_ID(z);
326 
327       if (tidz == 0)
328       {
329          MFEM_FOREACH_THREAD(d,y,D1D)
330          {
331             MFEM_FOREACH_THREAD(q,x,Q1D)
332             {
333                sBc[q][d] = Bc(q,d);
334                if (d < D1D-1)
335                {
336                   sBo[q][d] = Bo(q,d);
337                }
338             }
339          }
340       }
341       MFEM_SYNC_THREAD;
342 
343       int osc = 0;
344       for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
345       {
346          const int D1Dz = (c == 2) ? D1D - 1 : D1D;
347          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
348          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
349 
350          double dxyz = 0.0;
351 
352          for (int qz=0; qz < Q1D; ++qz)
353          {
354             if (tidz == qz)
355             {
356                for (int i=0; i<3; ++i)
357                {
358                   sop[i][tidx][tidy] = op3[i];
359                }
360             }
361 
362             MFEM_SYNC_THREAD;
363 
364             MFEM_FOREACH_THREAD(dz,z,D1Dz)
365             {
366                const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
367 
368                MFEM_FOREACH_THREAD(dy,y,D1Dy)
369                {
370                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
371                   {
372                      for (int qy = 0; qy < Q1D; ++qy)
373                      {
374                         const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
375 
376                         for (int qx = 0; qx < Q1D; ++qx)
377                         {
378                            const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
379                            dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
380                         }
381                      }
382                   }
383                }
384             }
385 
386             MFEM_SYNC_THREAD;
387          }  // qz loop
388 
389          MFEM_FOREACH_THREAD(dz,z,D1Dz)
390          {
391             MFEM_FOREACH_THREAD(dy,y,D1Dy)
392             {
393                MFEM_FOREACH_THREAD(dx,x,D1Dx)
394                {
395                   D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
396                }
397             }
398          }
399 
400          osc += D1Dx * D1Dy * D1Dz;
401       }  // c loop
402    }); // end of element loop
403 }
404 
PAHcurlMassApply3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)405 void PAHcurlMassApply3D(const int D1D,
406                         const int Q1D,
407                         const int NE,
408                         const bool symmetric,
409                         const Array<double> &bo,
410                         const Array<double> &bc,
411                         const Array<double> &bot,
412                         const Array<double> &bct,
413                         const Vector &pa_data,
414                         const Vector &x,
415                         Vector &y)
416 {
417    constexpr static int MAX_D1D = HCURL_MAX_D1D;
418    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
419 
420    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
421    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
422    constexpr static int VDIM = 3;
423 
424    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
425    auto Bc = Reshape(bc.Read(), Q1D, D1D);
426    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
427    auto Bct = Reshape(bct.Read(), D1D, Q1D);
428    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
429    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
430    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
431 
432    MFEM_FORALL(e, NE,
433    {
434       double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
435 
436       for (int qz = 0; qz < Q1D; ++qz)
437       {
438          for (int qy = 0; qy < Q1D; ++qy)
439          {
440             for (int qx = 0; qx < Q1D; ++qx)
441             {
442                for (int c = 0; c < VDIM; ++c)
443                {
444                   mass[qz][qy][qx][c] = 0.0;
445                }
446             }
447          }
448       }
449 
450       int osc = 0;
451 
452       for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
453       {
454          const int D1Dz = (c == 2) ? D1D - 1 : D1D;
455          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
456          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
457 
458          for (int dz = 0; dz < D1Dz; ++dz)
459          {
460             double massXY[MAX_Q1D][MAX_Q1D];
461             for (int qy = 0; qy < Q1D; ++qy)
462             {
463                for (int qx = 0; qx < Q1D; ++qx)
464                {
465                   massXY[qy][qx] = 0.0;
466                }
467             }
468 
469             for (int dy = 0; dy < D1Dy; ++dy)
470             {
471                double massX[MAX_Q1D];
472                for (int qx = 0; qx < Q1D; ++qx)
473                {
474                   massX[qx] = 0.0;
475                }
476 
477                for (int dx = 0; dx < D1Dx; ++dx)
478                {
479                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
480                   for (int qx = 0; qx < Q1D; ++qx)
481                   {
482                      massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
483                   }
484                }
485 
486                for (int qy = 0; qy < Q1D; ++qy)
487                {
488                   const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
489                   for (int qx = 0; qx < Q1D; ++qx)
490                   {
491                      const double wx = massX[qx];
492                      massXY[qy][qx] += wx * wy;
493                   }
494                }
495             }
496 
497             for (int qz = 0; qz < Q1D; ++qz)
498             {
499                const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
500                for (int qy = 0; qy < Q1D; ++qy)
501                {
502                   for (int qx = 0; qx < Q1D; ++qx)
503                   {
504                      mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
505                   }
506                }
507             }
508          }
509 
510          osc += D1Dx * D1Dy * D1Dz;
511       }  // loop (c) over components
512 
513       // Apply D operator.
514       for (int qz = 0; qz < Q1D; ++qz)
515       {
516          for (int qy = 0; qy < Q1D; ++qy)
517          {
518             for (int qx = 0; qx < Q1D; ++qx)
519             {
520                const double O11 = op(qx,qy,qz,0,e);
521                const double O12 = op(qx,qy,qz,1,e);
522                const double O13 = op(qx,qy,qz,2,e);
523                const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
524                const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
525                const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
526                const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
527                const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
528                const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
529                const double massX = mass[qz][qy][qx][0];
530                const double massY = mass[qz][qy][qx][1];
531                const double massZ = mass[qz][qy][qx][2];
532                mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
533                mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
534                mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
535             }
536          }
537       }
538 
539       for (int qz = 0; qz < Q1D; ++qz)
540       {
541          double massXY[MAX_D1D][MAX_D1D];
542 
543          osc = 0;
544 
545          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
546          {
547             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
548             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
549             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
550 
551             for (int dy = 0; dy < D1Dy; ++dy)
552             {
553                for (int dx = 0; dx < D1Dx; ++dx)
554                {
555                   massXY[dy][dx] = 0.0;
556                }
557             }
558             for (int qy = 0; qy < Q1D; ++qy)
559             {
560                double massX[MAX_D1D];
561                for (int dx = 0; dx < D1Dx; ++dx)
562                {
563                   massX[dx] = 0;
564                }
565                for (int qx = 0; qx < Q1D; ++qx)
566                {
567                   for (int dx = 0; dx < D1Dx; ++dx)
568                   {
569                      massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
570                   }
571                }
572                for (int dy = 0; dy < D1Dy; ++dy)
573                {
574                   const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
575                   for (int dx = 0; dx < D1Dx; ++dx)
576                   {
577                      massXY[dy][dx] += massX[dx] * wy;
578                   }
579                }
580             }
581 
582             for (int dz = 0; dz < D1Dz; ++dz)
583             {
584                const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
585                for (int dy = 0; dy < D1Dy; ++dy)
586                {
587                   for (int dx = 0; dx < D1Dx; ++dx)
588                   {
589                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
590                   }
591                }
592             }
593 
594             osc += D1Dx * D1Dy * D1Dz;
595          }  // loop c
596       }  // loop qz
597    }); // end of element loop
598 }
599 
600 template<int T_D1D, int T_Q1D>
SmemPAHcurlMassApply3D(const int D1D,const int Q1D,const int NE,const bool symmetric,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)601 void SmemPAHcurlMassApply3D(const int D1D,
602                             const int Q1D,
603                             const int NE,
604                             const bool symmetric,
605                             const Array<double> &bo,
606                             const Array<double> &bc,
607                             const Array<double> &bot,
608                             const Array<double> &bct,
609                             const Vector &pa_data,
610                             const Vector &x,
611                             Vector &y)
612 {
613    MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
614    MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
615 
616    const int dataSize = symmetric ? 6 : 9;
617 
618    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
619    auto Bc = Reshape(bc.Read(), Q1D, D1D);
620    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
621    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
622    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
623 
624    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
625    {
626       constexpr int VDIM = 3;
627       constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
628       constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
629 
630       MFEM_SHARED double sBo[tQ1D][tD1D];
631       MFEM_SHARED double sBc[tQ1D][tD1D];
632 
633       double op9[9];
634       MFEM_SHARED double sop[9*tQ1D*tQ1D];
635       MFEM_SHARED double mass[tQ1D][tQ1D][3];
636 
637       MFEM_SHARED double sX[tD1D][tD1D][tD1D];
638 
639       MFEM_FOREACH_THREAD(qx,x,Q1D)
640       {
641          MFEM_FOREACH_THREAD(qy,y,Q1D)
642          {
643             MFEM_FOREACH_THREAD(qz,z,Q1D)
644             {
645                for (int i=0; i<dataSize; ++i)
646                {
647                   op9[i] = op(qx,qy,qz,i,e);
648                }
649             }
650          }
651       }
652 
653       const int tidx = MFEM_THREAD_ID(x);
654       const int tidy = MFEM_THREAD_ID(y);
655       const int tidz = MFEM_THREAD_ID(z);
656 
657       if (tidz == 0)
658       {
659          MFEM_FOREACH_THREAD(d,y,D1D)
660          {
661             MFEM_FOREACH_THREAD(q,x,Q1D)
662             {
663                sBc[q][d] = Bc(q,d);
664                if (d < D1D-1)
665                {
666                   sBo[q][d] = Bo(q,d);
667                }
668             }
669          }
670       }
671       MFEM_SYNC_THREAD;
672 
673       for (int qz=0; qz < Q1D; ++qz)
674       {
675          int osc = 0;
676          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
677          {
678             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
679             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
680             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
681 
682             MFEM_FOREACH_THREAD(dz,z,D1Dz)
683             {
684                MFEM_FOREACH_THREAD(dy,y,D1Dy)
685                {
686                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
687                   {
688                      sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
689                   }
690                }
691             }
692             MFEM_SYNC_THREAD;
693 
694             if (tidz == qz)
695             {
696                for (int i=0; i<dataSize; ++i)
697                {
698                   sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
699                }
700 
701                MFEM_FOREACH_THREAD(qy,y,Q1D)
702                {
703                   MFEM_FOREACH_THREAD(qx,x,Q1D)
704                   {
705                      double u = 0.0;
706 
707                      for (int dz = 0; dz < D1Dz; ++dz)
708                      {
709                         const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
710                         for (int dy = 0; dy < D1Dy; ++dy)
711                         {
712                            const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
713                            for (int dx = 0; dx < D1Dx; ++dx)
714                            {
715                               const double t = sX[dz][dy][dx];
716                               const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
717                               u += t * wx * wy * wz;
718                            }
719                         }
720                      }
721 
722                      mass[qy][qx][c] = u;
723                   } // qx
724                } // qy
725             } // tidz == qz
726 
727             osc += D1Dx * D1Dy * D1Dz;
728             MFEM_SYNC_THREAD;
729          } // c
730 
731          MFEM_SYNC_THREAD;  // Sync mass[qy][qx][d] and sop
732 
733          osc = 0;
734          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
735          {
736             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
737             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
738             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
739 
740             double dxyz = 0.0;
741 
742             MFEM_FOREACH_THREAD(dz,z,D1Dz)
743             {
744                const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
745 
746                MFEM_FOREACH_THREAD(dy,y,D1Dy)
747                {
748                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
749                   {
750                      for (int qy = 0; qy < Q1D; ++qy)
751                      {
752                         const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
753                         for (int qx = 0; qx < Q1D; ++qx)
754                         {
755                            const int os = (dataSize*qx) + (dataSize*Q1D*qy);
756                            const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
757                                                                  (symmetric ? 2 : 6))); // O11, O21, O31
758                            const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
759                                                                  (symmetric ? 4 : 7))); // O12, O22, O32
760                            const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
761                                                                  (symmetric ? 5 : 8))); // O13, O23, O33
762 
763                            const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
764                                               (sop[id3] * mass[qy][qx][2]);
765 
766                            const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
767                            dxyz += m_c * wx * wy * wz;
768                         }
769                      }
770                   }
771                }
772             }
773 
774             MFEM_SYNC_THREAD;
775 
776             MFEM_FOREACH_THREAD(dz,z,D1Dz)
777             {
778                MFEM_FOREACH_THREAD(dy,y,D1Dy)
779                {
780                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
781                   {
782                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
783                   }
784                }
785             }
786 
787             osc += D1Dx * D1Dy * D1Dz;
788          } // c loop
789       } // qz
790    }); // end of element loop
791 }
792 
793 // PA H(curl) curl-curl assemble 2D kernel
PACurlCurlSetup2D(const int Q1D,const int NE,const Array<double> & w,const Vector & j,Vector & coeff,Vector & op)794 static void PACurlCurlSetup2D(const int Q1D,
795                               const int NE,
796                               const Array<double> &w,
797                               const Vector &j,
798                               Vector &coeff,
799                               Vector &op)
800 {
801    const int NQ = Q1D*Q1D;
802    auto W = w.Read();
803    auto J = Reshape(j.Read(), NQ, 2, 2, NE);
804    auto C = Reshape(coeff.Read(), NQ, NE);
805    auto y = Reshape(op.Write(), NQ, NE);
806    MFEM_FORALL(e, NE,
807    {
808       for (int q = 0; q < NQ; ++q)
809       {
810          const double J11 = J(q,0,0,e);
811          const double J21 = J(q,1,0,e);
812          const double J12 = J(q,0,1,e);
813          const double J22 = J(q,1,1,e);
814          const double detJ = (J11*J22)-(J21*J12);
815          y(q,e) = W[q] * C(q,e) / detJ;
816       }
817    });
818 }
819 
820 // PA H(curl) curl-curl assemble 3D kernel
PACurlCurlSetup3D(const int Q1D,const int coeffDim,const int NE,const Array<double> & w,const Vector & j,Vector & coeff,Vector & op)821 static void PACurlCurlSetup3D(const int Q1D,
822                               const int coeffDim,
823                               const int NE,
824                               const Array<double> &w,
825                               const Vector &j,
826                               Vector &coeff,
827                               Vector &op)
828 {
829    const int NQ = Q1D*Q1D*Q1D;
830    const bool symmetric = (coeffDim != 9);
831    auto W = w.Read();
832    auto J = Reshape(j.Read(), NQ, 3, 3, NE);
833    auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
834    auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
835 
836    MFEM_FORALL(e, NE,
837    {
838       for (int q = 0; q < NQ; ++q)
839       {
840          const double J11 = J(q,0,0,e);
841          const double J21 = J(q,1,0,e);
842          const double J31 = J(q,2,0,e);
843          const double J12 = J(q,0,1,e);
844          const double J22 = J(q,1,1,e);
845          const double J32 = J(q,2,1,e);
846          const double J13 = J(q,0,2,e);
847          const double J23 = J(q,1,2,e);
848          const double J33 = J(q,2,2,e);
849          const double detJ = J11 * (J22 * J33 - J32 * J23) -
850          /* */               J21 * (J12 * J33 - J32 * J13) +
851          /* */               J31 * (J12 * J23 - J22 * J13);
852 
853          const double c_detJ = W[q] / detJ;
854 
855          if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
856          {
857             // Set y to the 6 or 9 entries of J^T M J / det
858             const double M11 = C(0, q, e);
859             const double M12 = C(1, q, e);
860             const double M13 = C(2, q, e);
861             const double M21 = (!symmetric) ? C(3, q, e) : M12;
862             const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
863             const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
864             const double M31 = (!symmetric) ? C(6, q, e) : M13;
865             const double M32 = (!symmetric) ? C(7, q, e) : M23;
866             const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
867 
868             // First compute R = MJ
869             const double R11 = M11*J11 + M12*J21 + M13*J31;
870             const double R12 = M11*J12 + M12*J22 + M13*J32;
871             const double R13 = M11*J13 + M12*J23 + M13*J33;
872             const double R21 = M21*J11 + M22*J21 + M23*J31;
873             const double R22 = M21*J12 + M22*J22 + M23*J32;
874             const double R23 = M21*J13 + M22*J23 + M23*J33;
875             const double R31 = M31*J11 + M32*J21 + M33*J31;
876             const double R32 = M31*J12 + M32*J22 + M33*J32;
877             const double R33 = M31*J13 + M32*J23 + M33*J33;
878 
879             // Now set y to J^T R / det
880             y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
881             const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
882             y(q,1,e) = Y12; // 1,2
883             y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
884 
885             const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
886             const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
887             const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
888 
889             const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
890 
891             y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
892             y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
893             y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
894 
895             if (!symmetric)
896             {
897                y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
898                y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
899                y(q,8,e) = Y33; // 3,3
900             }
901          }
902          else  // Vector or scalar coefficient version
903          {
904             // Set y to the 6 entries of J^T D J / det^2
905             const double D1 = C(0, q, e);
906             const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
907             const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
908 
909             y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
910             y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
911             y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
912             y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
913             y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
914             y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
915          }
916       }
917    });
918 }
919 
AssemblePA(const FiniteElementSpace & fes)920 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
921 {
922    // Assumes tensor-product elements
923    Mesh *mesh = fes.GetMesh();
924    const FiniteElement *fel = fes.GetFE(0);
925 
926    const VectorTensorFiniteElement *el =
927       dynamic_cast<const VectorTensorFiniteElement*>(fel);
928    MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
929 
930    const IntegrationRule *ir
931       = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
932                                                      *mesh->GetElementTransformation(0));
933 
934    const int dims = el->GetDim();
935    MFEM_VERIFY(dims == 2 || dims == 3, "");
936 
937    const int nq = ir->GetNPoints();
938    dim = mesh->Dimension();
939    MFEM_VERIFY(dim == 2 || dim == 3, "");
940 
941    const int dimc = (dim == 3) ? 3 : 1;
942 
943    ne = fes.GetNE();
944    geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
945    mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
946    mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
947    dofs1D = mapsC->ndof;
948    quad1D = mapsC->nqpt;
949 
950    MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
951 
952    const int MQsymmDim = SMQ ? (SMQ->GetSize() * (SMQ->GetSize() + 1)) / 2 : 0;
953    const int MQfullDim = MQ ? (MQ->GetHeight() * MQ->GetWidth()) : 0;
954    const int MQdim = MQ ? MQfullDim : MQsymmDim;
955    const int coeffDim = (MQ || SMQ) ? MQdim : (DQ ? DQ->GetVDim() : 1);
956 
957    symmetric = (MQ == NULL);
958 
959    const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
960    const int ndata = (dim == 2) ? 1 : (symmetric ? symmDims : MQfullDim);
961    pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
962 
963    Vector coeff(coeffDim * ne * nq);
964    coeff = 1.0;
965    auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
966    if (Q || DQ || MQ || SMQ)
967    {
968       Vector D(DQ ? coeffDim : 0);
969       DenseMatrix M;
970       DenseSymmetricMatrix SM;
971 
972       if (DQ)
973       {
974          MFEM_VERIFY(coeffDim == dimc, "");
975       }
976       if (MQ)
977       {
978          M.SetSize(dimc);
979          MFEM_VERIFY(coeffDim == MQdim, "");
980          MFEM_VERIFY(MQ->GetHeight() == dimc && MQ->GetWidth() == dimc, "");
981       }
982       if (SMQ)
983       {
984          SM.SetSize(dimc);
985          MFEM_VERIFY(SMQ->GetSize() == dimc, "");
986       }
987 
988       for (int e=0; e<ne; ++e)
989       {
990          ElementTransformation *tr = mesh->GetElementTransformation(e);
991          for (int p=0; p<nq; ++p)
992          {
993             if (MQ)
994             {
995                MQ->Eval(M, *tr, ir->IntPoint(p));
996 
997                for (int i=0; i<dimc; ++i)
998                   for (int j=0; j<dimc; ++j)
999                   {
1000                      coeffh(j+(i*dimc), p, e) = M(i,j);
1001                   }
1002 
1003             }
1004             else if (SMQ)
1005             {
1006                SMQ->Eval(SM, *tr, ir->IntPoint(p));
1007 
1008                int cnt = 0;
1009                for (int i=0; i<dimc; ++i)
1010                   for (int j=i; j<dimc; ++j, ++cnt)
1011                   {
1012                      coeffh(cnt, p, e) = SM(i,j);
1013                   }
1014 
1015             }
1016             else if (DQ)
1017             {
1018                DQ->Eval(D, *tr, ir->IntPoint(p));
1019                for (int i=0; i<coeffDim; ++i)
1020                {
1021                   coeffh(i, p, e) = D[i];
1022                }
1023             }
1024             else
1025             {
1026                coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
1027             }
1028          }
1029       }
1030    }
1031 
1032    if (el->GetDerivType() != mfem::FiniteElement::CURL)
1033    {
1034       MFEM_ABORT("Unknown kernel.");
1035    }
1036 
1037    if (dim == 3)
1038    {
1039       PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
1040                         pa_data);
1041    }
1042    else
1043    {
1044       PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1045    }
1046 }
1047 
PACurlCurlApply2D(const int D1D,const int Q1D,const int NE,const Array<double> & bo,const Array<double> & bot,const Array<double> & gc,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)1048 static void PACurlCurlApply2D(const int D1D,
1049                               const int Q1D,
1050                               const int NE,
1051                               const Array<double> &bo,
1052                               const Array<double> &bot,
1053                               const Array<double> &gc,
1054                               const Array<double> &gct,
1055                               const Vector &pa_data,
1056                               const Vector &x,
1057                               Vector &y)
1058 {
1059    constexpr static int VDIM = 2;
1060    constexpr static int MAX_D1D = HCURL_MAX_D1D;
1061    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1062 
1063    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1064    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1065    auto Gc = Reshape(gc.Read(), Q1D, D1D);
1066    auto Gct = Reshape(gct.Read(), D1D, Q1D);
1067    auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1068    auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1069    auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1070 
1071    MFEM_FORALL(e, NE,
1072    {
1073       double curl[MAX_Q1D][MAX_Q1D];
1074 
1075       // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1076 
1077       for (int qy = 0; qy < Q1D; ++qy)
1078       {
1079          for (int qx = 0; qx < Q1D; ++qx)
1080          {
1081             curl[qy][qx] = 0.0;
1082          }
1083       }
1084 
1085       int osc = 0;
1086 
1087       for (int c = 0; c < VDIM; ++c)  // loop over x, y components
1088       {
1089          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1090          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1091 
1092          for (int dy = 0; dy < D1Dy; ++dy)
1093          {
1094             double gradX[MAX_Q1D];
1095             for (int qx = 0; qx < Q1D; ++qx)
1096             {
1097                gradX[qx] = 0;
1098             }
1099 
1100             for (int dx = 0; dx < D1Dx; ++dx)
1101             {
1102                const double t = X(dx + (dy * D1Dx) + osc, e);
1103                for (int qx = 0; qx < Q1D; ++qx)
1104                {
1105                   gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1106                }
1107             }
1108 
1109             for (int qy = 0; qy < Q1D; ++qy)
1110             {
1111                const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1112                for (int qx = 0; qx < Q1D; ++qx)
1113                {
1114                   curl[qy][qx] += gradX[qx] * wy;
1115                }
1116             }
1117          }
1118 
1119          osc += D1Dx * D1Dy;
1120       }  // loop (c) over components
1121 
1122       // Apply D operator.
1123       for (int qy = 0; qy < Q1D; ++qy)
1124       {
1125          for (int qx = 0; qx < Q1D; ++qx)
1126          {
1127             curl[qy][qx] *= op(qx,qy,e);
1128          }
1129       }
1130 
1131       for (int qy = 0; qy < Q1D; ++qy)
1132       {
1133          osc = 0;
1134 
1135          for (int c = 0; c < VDIM; ++c)  // loop over x, y components
1136          {
1137             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1138             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1139 
1140             double gradX[MAX_D1D];
1141             for (int dx = 0; dx < D1Dx; ++dx)
1142             {
1143                gradX[dx] = 0.0;
1144             }
1145             for (int qx = 0; qx < Q1D; ++qx)
1146             {
1147                for (int dx = 0; dx < D1Dx; ++dx)
1148                {
1149                   gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1150                }
1151             }
1152             for (int dy = 0; dy < D1Dy; ++dy)
1153             {
1154                const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1155 
1156                for (int dx = 0; dx < D1Dx; ++dx)
1157                {
1158                   Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1159                }
1160             }
1161 
1162             osc += D1Dx * D1Dy;
1163          }  // loop c
1164       }  // loop qy
1165    }); // end of element loop
1166 }
1167 
1168 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PACurlCurlApply3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)1169 static void PACurlCurlApply3D(const int D1D,
1170                               const int Q1D,
1171                               const bool symmetric,
1172                               const int NE,
1173                               const Array<double> &bo,
1174                               const Array<double> &bc,
1175                               const Array<double> &bot,
1176                               const Array<double> &bct,
1177                               const Array<double> &gc,
1178                               const Array<double> &gct,
1179                               const Vector &pa_data,
1180                               const Vector &x,
1181                               Vector &y)
1182 {
1183    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1184    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1185    // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1186    // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1187    // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1188    // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1189    // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1190 
1191    constexpr static int VDIM = 3;
1192 
1193    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1194    auto Bc = Reshape(bc.Read(), Q1D, D1D);
1195    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1196    auto Bct = Reshape(bct.Read(), D1D, Q1D);
1197    auto Gc = Reshape(gc.Read(), Q1D, D1D);
1198    auto Gct = Reshape(gct.Read(), D1D, Q1D);
1199    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1200    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1201    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1202 
1203    MFEM_FORALL(e, NE,
1204    {
1205       double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1206       // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1207 
1208       for (int qz = 0; qz < Q1D; ++qz)
1209       {
1210          for (int qy = 0; qy < Q1D; ++qy)
1211          {
1212             for (int qx = 0; qx < Q1D; ++qx)
1213             {
1214                for (int c = 0; c < VDIM; ++c)
1215                {
1216                   curl[qz][qy][qx][c] = 0.0;
1217                }
1218             }
1219          }
1220       }
1221 
1222       // We treat x, y, z components separately for optimization specific to each.
1223 
1224       int osc = 0;
1225 
1226       {
1227          // x component
1228          const int D1Dz = D1D;
1229          const int D1Dy = D1D;
1230          const int D1Dx = D1D - 1;
1231 
1232          for (int dz = 0; dz < D1Dz; ++dz)
1233          {
1234             double gradXY[MAX_Q1D][MAX_Q1D][2];
1235             for (int qy = 0; qy < Q1D; ++qy)
1236             {
1237                for (int qx = 0; qx < Q1D; ++qx)
1238                {
1239                   for (int d = 0; d < 2; ++d)
1240                   {
1241                      gradXY[qy][qx][d] = 0.0;
1242                   }
1243                }
1244             }
1245 
1246             for (int dy = 0; dy < D1Dy; ++dy)
1247             {
1248                double massX[MAX_Q1D];
1249                for (int qx = 0; qx < Q1D; ++qx)
1250                {
1251                   massX[qx] = 0.0;
1252                }
1253 
1254                for (int dx = 0; dx < D1Dx; ++dx)
1255                {
1256                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1257                   for (int qx = 0; qx < Q1D; ++qx)
1258                   {
1259                      massX[qx] += t * Bo(qx,dx);
1260                   }
1261                }
1262 
1263                for (int qy = 0; qy < Q1D; ++qy)
1264                {
1265                   const double wy = Bc(qy,dy);
1266                   const double wDy = Gc(qy,dy);
1267                   for (int qx = 0; qx < Q1D; ++qx)
1268                   {
1269                      const double wx = massX[qx];
1270                      gradXY[qy][qx][0] += wx * wDy;
1271                      gradXY[qy][qx][1] += wx * wy;
1272                   }
1273                }
1274             }
1275 
1276             for (int qz = 0; qz < Q1D; ++qz)
1277             {
1278                const double wz = Bc(qz,dz);
1279                const double wDz = Gc(qz,dz);
1280                for (int qy = 0; qy < Q1D; ++qy)
1281                {
1282                   for (int qx = 0; qx < Q1D; ++qx)
1283                   {
1284                      // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1285                      curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1286                      curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz;  // -(u_0)_{x_1}
1287                   }
1288                }
1289             }
1290          }
1291 
1292          osc += D1Dx * D1Dy * D1Dz;
1293       }
1294 
1295       {
1296          // y component
1297          const int D1Dz = D1D;
1298          const int D1Dy = D1D - 1;
1299          const int D1Dx = D1D;
1300 
1301          for (int dz = 0; dz < D1Dz; ++dz)
1302          {
1303             double gradXY[MAX_Q1D][MAX_Q1D][2];
1304             for (int qy = 0; qy < Q1D; ++qy)
1305             {
1306                for (int qx = 0; qx < Q1D; ++qx)
1307                {
1308                   for (int d = 0; d < 2; ++d)
1309                   {
1310                      gradXY[qy][qx][d] = 0.0;
1311                   }
1312                }
1313             }
1314 
1315             for (int dx = 0; dx < D1Dx; ++dx)
1316             {
1317                double massY[MAX_Q1D];
1318                for (int qy = 0; qy < Q1D; ++qy)
1319                {
1320                   massY[qy] = 0.0;
1321                }
1322 
1323                for (int dy = 0; dy < D1Dy; ++dy)
1324                {
1325                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1326                   for (int qy = 0; qy < Q1D; ++qy)
1327                   {
1328                      massY[qy] += t * Bo(qy,dy);
1329                   }
1330                }
1331 
1332                for (int qx = 0; qx < Q1D; ++qx)
1333                {
1334                   const double wx = Bc(qx,dx);
1335                   const double wDx = Gc(qx,dx);
1336                   for (int qy = 0; qy < Q1D; ++qy)
1337                   {
1338                      const double wy = massY[qy];
1339                      gradXY[qy][qx][0] += wDx * wy;
1340                      gradXY[qy][qx][1] += wx * wy;
1341                   }
1342                }
1343             }
1344 
1345             for (int qz = 0; qz < Q1D; ++qz)
1346             {
1347                const double wz = Bc(qz,dz);
1348                const double wDz = Gc(qz,dz);
1349                for (int qy = 0; qy < Q1D; ++qy)
1350                {
1351                   for (int qx = 0; qx < Q1D; ++qx)
1352                   {
1353                      // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1354                      curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1355                      curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz;  // (u_1)_{x_0}
1356                   }
1357                }
1358             }
1359          }
1360 
1361          osc += D1Dx * D1Dy * D1Dz;
1362       }
1363 
1364       {
1365          // z component
1366          const int D1Dz = D1D - 1;
1367          const int D1Dy = D1D;
1368          const int D1Dx = D1D;
1369 
1370          for (int dx = 0; dx < D1Dx; ++dx)
1371          {
1372             double gradYZ[MAX_Q1D][MAX_Q1D][2];
1373             for (int qz = 0; qz < Q1D; ++qz)
1374             {
1375                for (int qy = 0; qy < Q1D; ++qy)
1376                {
1377                   for (int d = 0; d < 2; ++d)
1378                   {
1379                      gradYZ[qz][qy][d] = 0.0;
1380                   }
1381                }
1382             }
1383 
1384             for (int dy = 0; dy < D1Dy; ++dy)
1385             {
1386                double massZ[MAX_Q1D];
1387                for (int qz = 0; qz < Q1D; ++qz)
1388                {
1389                   massZ[qz] = 0.0;
1390                }
1391 
1392                for (int dz = 0; dz < D1Dz; ++dz)
1393                {
1394                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1395                   for (int qz = 0; qz < Q1D; ++qz)
1396                   {
1397                      massZ[qz] += t * Bo(qz,dz);
1398                   }
1399                }
1400 
1401                for (int qy = 0; qy < Q1D; ++qy)
1402                {
1403                   const double wy = Bc(qy,dy);
1404                   const double wDy = Gc(qy,dy);
1405                   for (int qz = 0; qz < Q1D; ++qz)
1406                   {
1407                      const double wz = massZ[qz];
1408                      gradYZ[qz][qy][0] += wz * wy;
1409                      gradYZ[qz][qy][1] += wz * wDy;
1410                   }
1411                }
1412             }
1413 
1414             for (int qx = 0; qx < Q1D; ++qx)
1415             {
1416                const double wx = Bc(qx,dx);
1417                const double wDx = Gc(qx,dx);
1418 
1419                for (int qy = 0; qy < Q1D; ++qy)
1420                {
1421                   for (int qz = 0; qz < Q1D; ++qz)
1422                   {
1423                      // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1424                      curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx;  // (u_2)_{x_1}
1425                      curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1426                   }
1427                }
1428             }
1429          }
1430       }
1431 
1432       // Apply D operator.
1433       for (int qz = 0; qz < Q1D; ++qz)
1434       {
1435          for (int qy = 0; qy < Q1D; ++qy)
1436          {
1437             for (int qx = 0; qx < Q1D; ++qx)
1438             {
1439                const double O11 = op(qx,qy,qz,0,e);
1440                const double O12 = op(qx,qy,qz,1,e);
1441                const double O13 = op(qx,qy,qz,2,e);
1442                const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1443                const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1444                const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1445                const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1446                const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1447                const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1448 
1449                const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1450                                  (O13 * curl[qz][qy][qx][2]);
1451                const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1452                                  (O23 * curl[qz][qy][qx][2]);
1453                const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1454                                  (O33 * curl[qz][qy][qx][2]);
1455 
1456                curl[qz][qy][qx][0] = c1;
1457                curl[qz][qy][qx][1] = c2;
1458                curl[qz][qy][qx][2] = c3;
1459             }
1460          }
1461       }
1462 
1463       // x component
1464       osc = 0;
1465       {
1466          const int D1Dz = D1D;
1467          const int D1Dy = D1D;
1468          const int D1Dx = D1D - 1;
1469 
1470          for (int qz = 0; qz < Q1D; ++qz)
1471          {
1472             double gradXY12[MAX_D1D][MAX_D1D];
1473             double gradXY21[MAX_D1D][MAX_D1D];
1474 
1475             for (int dy = 0; dy < D1Dy; ++dy)
1476             {
1477                for (int dx = 0; dx < D1Dx; ++dx)
1478                {
1479                   gradXY12[dy][dx] = 0.0;
1480                   gradXY21[dy][dx] = 0.0;
1481                }
1482             }
1483             for (int qy = 0; qy < Q1D; ++qy)
1484             {
1485                double massX[MAX_D1D][2];
1486                for (int dx = 0; dx < D1Dx; ++dx)
1487                {
1488                   for (int n = 0; n < 2; ++n)
1489                   {
1490                      massX[dx][n] = 0.0;
1491                   }
1492                }
1493                for (int qx = 0; qx < Q1D; ++qx)
1494                {
1495                   for (int dx = 0; dx < D1Dx; ++dx)
1496                   {
1497                      const double wx = Bot(dx,qx);
1498 
1499                      massX[dx][0] += wx * curl[qz][qy][qx][1];
1500                      massX[dx][1] += wx * curl[qz][qy][qx][2];
1501                   }
1502                }
1503                for (int dy = 0; dy < D1Dy; ++dy)
1504                {
1505                   const double wy = Bct(dy,qy);
1506                   const double wDy = Gct(dy,qy);
1507 
1508                   for (int dx = 0; dx < D1Dx; ++dx)
1509                   {
1510                      gradXY21[dy][dx] += massX[dx][0] * wy;
1511                      gradXY12[dy][dx] += massX[dx][1] * wDy;
1512                   }
1513                }
1514             }
1515 
1516             for (int dz = 0; dz < D1Dz; ++dz)
1517             {
1518                const double wz = Bct(dz,qz);
1519                const double wDz = Gct(dz,qz);
1520                for (int dy = 0; dy < D1Dy; ++dy)
1521                {
1522                   for (int dx = 0; dx < D1Dx; ++dx)
1523                   {
1524                      // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1525                      // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1526                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1527                        e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1528                   }
1529                }
1530             }
1531          }  // loop qz
1532 
1533          osc += D1Dx * D1Dy * D1Dz;
1534       }
1535 
1536       // y component
1537       {
1538          const int D1Dz = D1D;
1539          const int D1Dy = D1D - 1;
1540          const int D1Dx = D1D;
1541 
1542          for (int qz = 0; qz < Q1D; ++qz)
1543          {
1544             double gradXY02[MAX_D1D][MAX_D1D];
1545             double gradXY20[MAX_D1D][MAX_D1D];
1546 
1547             for (int dy = 0; dy < D1Dy; ++dy)
1548             {
1549                for (int dx = 0; dx < D1Dx; ++dx)
1550                {
1551                   gradXY02[dy][dx] = 0.0;
1552                   gradXY20[dy][dx] = 0.0;
1553                }
1554             }
1555             for (int qx = 0; qx < Q1D; ++qx)
1556             {
1557                double massY[MAX_D1D][2];
1558                for (int dy = 0; dy < D1Dy; ++dy)
1559                {
1560                   massY[dy][0] = 0.0;
1561                   massY[dy][1] = 0.0;
1562                }
1563                for (int qy = 0; qy < Q1D; ++qy)
1564                {
1565                   for (int dy = 0; dy < D1Dy; ++dy)
1566                   {
1567                      const double wy = Bot(dy,qy);
1568 
1569                      massY[dy][0] += wy * curl[qz][qy][qx][2];
1570                      massY[dy][1] += wy * curl[qz][qy][qx][0];
1571                   }
1572                }
1573                for (int dx = 0; dx < D1Dx; ++dx)
1574                {
1575                   const double wx = Bct(dx,qx);
1576                   const double wDx = Gct(dx,qx);
1577 
1578                   for (int dy = 0; dy < D1Dy; ++dy)
1579                   {
1580                      gradXY02[dy][dx] += massY[dy][0] * wDx;
1581                      gradXY20[dy][dx] += massY[dy][1] * wx;
1582                   }
1583                }
1584             }
1585 
1586             for (int dz = 0; dz < D1Dz; ++dz)
1587             {
1588                const double wz = Bct(dz,qz);
1589                const double wDz = Gct(dz,qz);
1590                for (int dy = 0; dy < D1Dy; ++dy)
1591                {
1592                   for (int dx = 0; dx < D1Dx; ++dx)
1593                   {
1594                      // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1595                      // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1596                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1597                        e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1598                   }
1599                }
1600             }
1601          }  // loop qz
1602 
1603          osc += D1Dx * D1Dy * D1Dz;
1604       }
1605 
1606       // z component
1607       {
1608          const int D1Dz = D1D - 1;
1609          const int D1Dy = D1D;
1610          const int D1Dx = D1D;
1611 
1612          for (int qx = 0; qx < Q1D; ++qx)
1613          {
1614             double gradYZ01[MAX_D1D][MAX_D1D];
1615             double gradYZ10[MAX_D1D][MAX_D1D];
1616 
1617             for (int dy = 0; dy < D1Dy; ++dy)
1618             {
1619                for (int dz = 0; dz < D1Dz; ++dz)
1620                {
1621                   gradYZ01[dz][dy] = 0.0;
1622                   gradYZ10[dz][dy] = 0.0;
1623                }
1624             }
1625             for (int qy = 0; qy < Q1D; ++qy)
1626             {
1627                double massZ[MAX_D1D][2];
1628                for (int dz = 0; dz < D1Dz; ++dz)
1629                {
1630                   for (int n = 0; n < 2; ++n)
1631                   {
1632                      massZ[dz][n] = 0.0;
1633                   }
1634                }
1635                for (int qz = 0; qz < Q1D; ++qz)
1636                {
1637                   for (int dz = 0; dz < D1Dz; ++dz)
1638                   {
1639                      const double wz = Bot(dz,qz);
1640 
1641                      massZ[dz][0] += wz * curl[qz][qy][qx][0];
1642                      massZ[dz][1] += wz * curl[qz][qy][qx][1];
1643                   }
1644                }
1645                for (int dy = 0; dy < D1Dy; ++dy)
1646                {
1647                   const double wy = Bct(dy,qy);
1648                   const double wDy = Gct(dy,qy);
1649 
1650                   for (int dz = 0; dz < D1Dz; ++dz)
1651                   {
1652                      gradYZ01[dz][dy] += wy * massZ[dz][1];
1653                      gradYZ10[dz][dy] += wDy * massZ[dz][0];
1654                   }
1655                }
1656             }
1657 
1658             for (int dx = 0; dx < D1Dx; ++dx)
1659             {
1660                const double wx = Bct(dx,qx);
1661                const double wDx = Gct(dx,qx);
1662 
1663                for (int dy = 0; dy < D1Dy; ++dy)
1664                {
1665                   for (int dz = 0; dz < D1Dz; ++dz)
1666                   {
1667                      // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1668                      // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1669                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1670                        e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1671                   }
1672                }
1673             }
1674          }  // loop qx
1675       }
1676    }); // end of element loop
1677 }
1678 
1679 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPACurlCurlApply3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)1680 static void SmemPACurlCurlApply3D(const int D1D,
1681                                   const int Q1D,
1682                                   const bool symmetric,
1683                                   const int NE,
1684                                   const Array<double> &bo,
1685                                   const Array<double> &bc,
1686                                   const Array<double> &bot,
1687                                   const Array<double> &bct,
1688                                   const Array<double> &gc,
1689                                   const Array<double> &gct,
1690                                   const Vector &pa_data,
1691                                   const Vector &x,
1692                                   Vector &y)
1693 {
1694    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1695    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1696    // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1697    // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1698    // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1699    // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1700    // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1701 
1702    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1703    auto Bc = Reshape(bc.Read(), Q1D, D1D);
1704    auto Gc = Reshape(gc.Read(), Q1D, D1D);
1705    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1706    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1707    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1708 
1709    const int s = symmetric ? 6 : 9;
1710 
1711    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1712    {
1713       constexpr int VDIM = 3;
1714 
1715       MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1716       MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1717       MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1718 
1719       double ope[9];
1720       MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1721       MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1722 
1723       MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1724 
1725       MFEM_FOREACH_THREAD(qx,x,Q1D)
1726       {
1727          MFEM_FOREACH_THREAD(qy,y,Q1D)
1728          {
1729             MFEM_FOREACH_THREAD(qz,z,Q1D)
1730             {
1731                for (int i=0; i<s; ++i)
1732                {
1733                   ope[i] = op(qx,qy,qz,i,e);
1734                }
1735             }
1736          }
1737       }
1738 
1739       const int tidx = MFEM_THREAD_ID(x);
1740       const int tidy = MFEM_THREAD_ID(y);
1741       const int tidz = MFEM_THREAD_ID(z);
1742 
1743       if (tidz == 0)
1744       {
1745          MFEM_FOREACH_THREAD(d,y,D1D)
1746          {
1747             MFEM_FOREACH_THREAD(q,x,Q1D)
1748             {
1749                sBc[d][q] = Bc(q,d);
1750                sGc[d][q] = Gc(q,d);
1751                if (d < D1D-1)
1752                {
1753                   sBo[d][q] = Bo(q,d);
1754                }
1755             }
1756          }
1757       }
1758       MFEM_SYNC_THREAD;
1759 
1760       for (int qz=0; qz < Q1D; ++qz)
1761       {
1762          if (tidz == qz)
1763          {
1764             MFEM_FOREACH_THREAD(qy,y,Q1D)
1765             {
1766                MFEM_FOREACH_THREAD(qx,x,Q1D)
1767                {
1768                   for (int i=0; i<3; ++i)
1769                   {
1770                      curl[qy][qx][i] = 0.0;
1771                   }
1772                }
1773             }
1774          }
1775 
1776          int osc = 0;
1777          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
1778          {
1779             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1780             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1781             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1782 
1783             MFEM_FOREACH_THREAD(dz,z,D1Dz)
1784             {
1785                MFEM_FOREACH_THREAD(dy,y,D1Dy)
1786                {
1787                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
1788                   {
1789                      sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1790                   }
1791                }
1792             }
1793             MFEM_SYNC_THREAD;
1794 
1795             if (tidz == qz)
1796             {
1797                if (c == 0)
1798                {
1799                   for (int i=0; i<s; ++i)
1800                   {
1801                      sop[i][tidx][tidy] = ope[i];
1802                   }
1803                }
1804 
1805                MFEM_FOREACH_THREAD(qy,y,Q1D)
1806                {
1807                   MFEM_FOREACH_THREAD(qx,x,Q1D)
1808                   {
1809                      double u = 0.0;
1810                      double v = 0.0;
1811 
1812                      // We treat x, y, z components separately for optimization specific to each.
1813                      if (c == 0) // x component
1814                      {
1815                         // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1816 
1817                         for (int dz = 0; dz < D1Dz; ++dz)
1818                         {
1819                            const double wz = sBc[dz][qz];
1820                            const double wDz = sGc[dz][qz];
1821 
1822                            for (int dy = 0; dy < D1Dy; ++dy)
1823                            {
1824                               const double wy = sBc[dy][qy];
1825                               const double wDy = sGc[dy][qy];
1826 
1827                               for (int dx = 0; dx < D1Dx; ++dx)
1828                               {
1829                                  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1830                                  u += wx * wDy * wz;
1831                                  v += wx * wy * wDz;
1832                               }
1833                            }
1834                         }
1835 
1836                         curl[qy][qx][1] += v; // (u_0)_{x_2}
1837                         curl[qy][qx][2] -= u;  // -(u_0)_{x_1}
1838                      }
1839                      else if (c == 1)  // y component
1840                      {
1841                         // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1842 
1843                         for (int dz = 0; dz < D1Dz; ++dz)
1844                         {
1845                            const double wz = sBc[dz][qz];
1846                            const double wDz = sGc[dz][qz];
1847 
1848                            for (int dy = 0; dy < D1Dy; ++dy)
1849                            {
1850                               const double wy = sBo[dy][qy];
1851 
1852                               for (int dx = 0; dx < D1Dx; ++dx)
1853                               {
1854                                  const double t = sX[dz][dy][dx];
1855                                  const double wx = t * sBc[dx][qx];
1856                                  const double wDx = t * sGc[dx][qx];
1857 
1858                                  u += wDx * wy * wz;
1859                                  v += wx * wy * wDz;
1860                               }
1861                            }
1862                         }
1863 
1864                         curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1865                         curl[qy][qx][2] += u; // (u_1)_{x_0}
1866                      }
1867                      else // z component
1868                      {
1869                         // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1870 
1871                         for (int dz = 0; dz < D1Dz; ++dz)
1872                         {
1873                            const double wz = sBo[dz][qz];
1874 
1875                            for (int dy = 0; dy < D1Dy; ++dy)
1876                            {
1877                               const double wy = sBc[dy][qy];
1878                               const double wDy = sGc[dy][qy];
1879 
1880                               for (int dx = 0; dx < D1Dx; ++dx)
1881                               {
1882                                  const double t = sX[dz][dy][dx];
1883                                  const double wx = t * sBc[dx][qx];
1884                                  const double wDx = t * sGc[dx][qx];
1885 
1886                                  u += wDx * wy * wz;
1887                                  v += wx * wDy * wz;
1888                               }
1889                            }
1890                         }
1891 
1892                         curl[qy][qx][0] += v; // (u_2)_{x_1}
1893                         curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1894                      }
1895                   } // qx
1896                } // qy
1897             } // tidz == qz
1898 
1899             osc += D1Dx * D1Dy * D1Dz;
1900             MFEM_SYNC_THREAD;
1901          } // c
1902 
1903          double dxyz1 = 0.0;
1904          double dxyz2 = 0.0;
1905          double dxyz3 = 0.0;
1906 
1907          MFEM_FOREACH_THREAD(dz,z,D1D)
1908          {
1909             const double wcz = sBc[dz][qz];
1910             const double wcDz = sGc[dz][qz];
1911             const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1912 
1913             MFEM_FOREACH_THREAD(dy,y,D1D)
1914             {
1915                MFEM_FOREACH_THREAD(dx,x,D1D)
1916                {
1917                   for (int qy = 0; qy < Q1D; ++qy)
1918                   {
1919                      const double wcy = sBc[dy][qy];
1920                      const double wcDy = sGc[dy][qy];
1921                      const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1922 
1923                      for (int qx = 0; qx < Q1D; ++qx)
1924                      {
1925                         const double O11 = sop[0][qx][qy];
1926                         const double O12 = sop[1][qx][qy];
1927                         const double O13 = sop[2][qx][qy];
1928                         const double O21 = symmetric ? O12 : sop[3][qx][qy];
1929                         const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1930                         const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1931                         const double O31 = symmetric ? O13 : sop[6][qx][qy];
1932                         const double O32 = symmetric ? O23 : sop[7][qx][qy];
1933                         const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1934 
1935                         const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1936                                           (O13 * curl[qy][qx][2]);
1937                         const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1938                                           (O23 * curl[qy][qx][2]);
1939                         const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1940                                           (O33 * curl[qy][qx][2]);
1941 
1942                         const double wcx = sBc[dx][qx];
1943                         const double wDx = sGc[dx][qx];
1944 
1945                         if (dx < D1D-1)
1946                         {
1947                            // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1948                            // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1949                            const double wx = sBo[dx][qx];
1950                            dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1951                         }
1952 
1953                         // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1954                         // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1955                         dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1956 
1957                         // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1958                         // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1959                         dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1960                      } // qx
1961                   } // qy
1962                } // dx
1963             } // dy
1964          } // dz
1965 
1966          MFEM_SYNC_THREAD;
1967 
1968          MFEM_FOREACH_THREAD(dz,z,D1D)
1969          {
1970             MFEM_FOREACH_THREAD(dy,y,D1D)
1971             {
1972                MFEM_FOREACH_THREAD(dx,x,D1D)
1973                {
1974                   if (dx < D1D-1)
1975                   {
1976                      Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
1977                   }
1978                   if (dy < D1D-1)
1979                   {
1980                      Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
1981                   }
1982                   if (dz < D1D-1)
1983                   {
1984                      Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
1985                   }
1986                }
1987             }
1988          }
1989       } // qz
1990    }); // end of element loop
1991 }
1992 
AddMultPA(const Vector & x,Vector & y) const1993 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
1994 {
1995    if (dim == 3)
1996    {
1997       if (Device::Allows(Backend::DEVICE_MASK))
1998       {
1999          const int ID = (dofs1D << 4) | quad1D;
2000          switch (ID)
2001          {
2002             case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2003                                                             mapsO->B, mapsC->B, mapsO->Bt,
2004                                                             mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2005             case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2006                                                             mapsO->B, mapsC->B, mapsO->Bt,
2007                                                             mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2008             case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2009                                                             mapsO->B,
2010                                                             mapsC->B, mapsO->Bt,
2011                                                             mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2012             case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2013                                                             mapsO->B, mapsC->B, mapsO->Bt,
2014                                                             mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2015             default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2016                                                      mapsC->B, mapsO->Bt, mapsC->Bt,
2017                                                      mapsC->G, mapsC->Gt, pa_data, x, y);
2018          }
2019       }
2020       else
2021          PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2022                            mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2023    }
2024    else if (dim == 2)
2025    {
2026       PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2027                         mapsC->G, mapsC->Gt, pa_data, x, y);
2028    }
2029    else
2030    {
2031       MFEM_ABORT("Unsupported dimension!");
2032    }
2033 }
2034 
PACurlCurlAssembleDiagonal2D(const int D1D,const int Q1D,const int NE,const Array<double> & bo,const Array<double> & gc,const Vector & pa_data,Vector & diag)2035 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2036                                          const int Q1D,
2037                                          const int NE,
2038                                          const Array<double> &bo,
2039                                          const Array<double> &gc,
2040                                          const Vector &pa_data,
2041                                          Vector &diag)
2042 {
2043    constexpr static int VDIM = 2;
2044    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2045 
2046    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2047    auto Gc = Reshape(gc.Read(), Q1D, D1D);
2048    auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2049    auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2050 
2051    MFEM_FORALL(e, NE,
2052    {
2053       int osc = 0;
2054 
2055       for (int c = 0; c < VDIM; ++c)  // loop over x, y components
2056       {
2057          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2058          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2059 
2060          double t[MAX_Q1D];
2061 
2062          for (int dy = 0; dy < D1Dy; ++dy)
2063          {
2064             for (int qx = 0; qx < Q1D; ++qx)
2065             {
2066                t[qx] = 0.0;
2067                for (int qy = 0; qy < Q1D; ++qy)
2068                {
2069                   const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2070                   t[qx] += wy * wy * op(qx,qy,e);
2071                }
2072             }
2073 
2074             for (int dx = 0; dx < D1Dx; ++dx)
2075             {
2076                for (int qx = 0; qx < Q1D; ++qx)
2077                {
2078                   const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2079                   D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2080                }
2081             }
2082          }
2083 
2084          osc += D1Dx * D1Dy;
2085       }  // loop c
2086    }); // end of element loop
2087 }
2088 
2089 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PACurlCurlAssembleDiagonal3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & go,const Array<double> & gc,const Vector & pa_data,Vector & diag)2090 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2091                                          const int Q1D,
2092                                          const bool symmetric,
2093                                          const int NE,
2094                                          const Array<double> &bo,
2095                                          const Array<double> &bc,
2096                                          const Array<double> &go,
2097                                          const Array<double> &gc,
2098                                          const Vector &pa_data,
2099                                          Vector &diag)
2100 {
2101    constexpr static int VDIM = 3;
2102    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2103    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2104 
2105    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2106    auto Bc = Reshape(bc.Read(), Q1D, D1D);
2107    auto Go = Reshape(go.Read(), Q1D, D1D-1);
2108    auto Gc = Reshape(gc.Read(), Q1D, D1D);
2109    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2110    auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2111 
2112    const int s = symmetric ? 6 : 9;
2113    const int i11 = 0;
2114    const int i12 = 1;
2115    const int i13 = 2;
2116    const int i21 = symmetric ? i12 : 3;
2117    const int i22 = symmetric ? 3 : 4;
2118    const int i23 = symmetric ? 4 : 5;
2119    const int i31 = symmetric ? i13 : 6;
2120    const int i32 = symmetric ? i23 : 7;
2121    const int i33 = symmetric ? 5 : 8;
2122 
2123    MFEM_FORALL(e, NE,
2124    {
2125       // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2126       // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2127       // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2128       // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2129       // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2130 
2131       // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2132       // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2133 
2134       int osc = 0;
2135 
2136       for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
2137       {
2138          const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2139          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2140          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2141 
2142          double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2143 
2144          // z contraction
2145          for (int qx = 0; qx < Q1D; ++qx)
2146          {
2147             for (int qy = 0; qy < Q1D; ++qy)
2148             {
2149                for (int dz = 0; dz < D1Dz; ++dz)
2150                {
2151                   for (int i=0; i<s; ++i)
2152                   {
2153                      for (int d=0; d<3; ++d)
2154                      {
2155                         zt[qx][qy][dz][i][d] = 0.0;
2156                      }
2157                   }
2158 
2159                   for (int qz = 0; qz < Q1D; ++qz)
2160                   {
2161                      const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2162                      const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2163 
2164                      for (int i=0; i<s; ++i)
2165                      {
2166                         zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2167                         zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2168                         zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2169                      }
2170                   }
2171                }
2172             }
2173          }  // end of z contraction
2174 
2175          double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2176 
2177          // y contraction
2178          for (int qx = 0; qx < Q1D; ++qx)
2179          {
2180             for (int dz = 0; dz < D1Dz; ++dz)
2181             {
2182                for (int dy = 0; dy < D1Dy; ++dy)
2183                {
2184                   for (int i=0; i<s; ++i)
2185                   {
2186                      for (int d=0; d<3; ++d)
2187                         for (int j=0; j<3; ++j)
2188                         {
2189                            yt[qx][dy][dz][i][d][j] = 0.0;
2190                         }
2191                   }
2192 
2193                   for (int qy = 0; qy < Q1D; ++qy)
2194                   {
2195                      const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2196                      const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2197 
2198                      for (int i=0; i<s; ++i)
2199                      {
2200                         for (int d=0; d<3; ++d)
2201                         {
2202                            yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2203                            yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2204                            yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2205                         }
2206                      }
2207                   }
2208                }
2209             }
2210          }  // end of y contraction
2211 
2212          // x contraction
2213          for (int dz = 0; dz < D1Dz; ++dz)
2214          {
2215             for (int dy = 0; dy < D1Dy; ++dy)
2216             {
2217                for (int dx = 0; dx < D1Dx; ++dx)
2218                {
2219                   for (int qx = 0; qx < Q1D; ++qx)
2220                   {
2221                      const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2222                      const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2223 
2224                      // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2225                      // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2226                      // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2227                      // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2228                      // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2229 
2230                      /*
2231                        const double O11 = op(q,0,e);
2232                        const double O12 = op(q,1,e);
2233                        const double O13 = op(q,2,e);
2234                        const double O22 = op(q,3,e);
2235                        const double O23 = op(q,4,e);
2236                        const double O33 = op(q,5,e);
2237                      */
2238 
2239                      if (c == 0)
2240                      {
2241                         // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2242                         const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2243                                             - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2244 
2245                         D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2246                      }
2247                      else if (c == 1)
2248                      {
2249                         // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2250                         const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2251                                          - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2252                                          + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2253 
2254                         D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2255                      }
2256                      else
2257                      {
2258                         // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2259                         const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2260                                          - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2261                                          + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2262 
2263                         D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2264                      }
2265                   }
2266                }
2267             }
2268          }  // end of x contraction
2269 
2270          osc += D1Dx * D1Dy * D1Dz;
2271       }  // loop c
2272    }); // end of element loop
2273 }
2274 
2275 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPACurlCurlAssembleDiagonal3D(const int D1D,const int Q1D,const bool symmetric,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & go,const Array<double> & gc,const Vector & pa_data,Vector & diag)2276 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2277                                              const int Q1D,
2278                                              const bool symmetric,
2279                                              const int NE,
2280                                              const Array<double> &bo,
2281                                              const Array<double> &bc,
2282                                              const Array<double> &go,
2283                                              const Array<double> &gc,
2284                                              const Vector &pa_data,
2285                                              Vector &diag)
2286 {
2287    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2288    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2289 
2290    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2291    auto Bc = Reshape(bc.Read(), Q1D, D1D);
2292    auto Go = Reshape(go.Read(), Q1D, D1D-1);
2293    auto Gc = Reshape(gc.Read(), Q1D, D1D);
2294    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2295    auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2296 
2297    const int s = symmetric ? 6 : 9;
2298    const int i11 = 0;
2299    const int i12 = 1;
2300    const int i13 = 2;
2301    const int i21 = symmetric ? i12 : 3;
2302    const int i22 = symmetric ? 3 : 4;
2303    const int i23 = symmetric ? 4 : 5;
2304    const int i31 = symmetric ? i13 : 6;
2305    const int i32 = symmetric ? i23 : 7;
2306    const int i33 = symmetric ? 5 : 8;
2307 
2308    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2309    {
2310       // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2311       // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2312       // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2313       // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2314       // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2315 
2316       constexpr int VDIM = 3;
2317 
2318       MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2319       MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2320       MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2321       MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2322 
2323       double ope[9];
2324       MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2325 
2326       MFEM_FOREACH_THREAD(qx,x,Q1D)
2327       {
2328          MFEM_FOREACH_THREAD(qy,y,Q1D)
2329          {
2330             MFEM_FOREACH_THREAD(qz,z,Q1D)
2331             {
2332                for (int i=0; i<s; ++i)
2333                {
2334                   ope[i] = op(qx,qy,qz,i,e);
2335                }
2336             }
2337          }
2338       }
2339 
2340       const int tidx = MFEM_THREAD_ID(x);
2341       const int tidy = MFEM_THREAD_ID(y);
2342       const int tidz = MFEM_THREAD_ID(z);
2343 
2344       if (tidz == 0)
2345       {
2346          MFEM_FOREACH_THREAD(d,y,D1D)
2347          {
2348             MFEM_FOREACH_THREAD(q,x,Q1D)
2349             {
2350                sBc[q][d] = Bc(q,d);
2351                sGc[q][d] = Gc(q,d);
2352                if (d < D1D-1)
2353                {
2354                   sBo[q][d] = Bo(q,d);
2355                   sGo[q][d] = Go(q,d);
2356                }
2357             }
2358          }
2359       }
2360       MFEM_SYNC_THREAD;
2361 
2362       int osc = 0;
2363       for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
2364       {
2365          const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2366          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2367          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2368 
2369          double dxyz = 0.0;
2370 
2371          for (int qz=0; qz < Q1D; ++qz)
2372          {
2373             if (tidz == qz)
2374             {
2375                for (int i=0; i<s; ++i)
2376                {
2377                   sop[i][tidx][tidy] = ope[i];
2378                }
2379             }
2380 
2381             MFEM_SYNC_THREAD;
2382 
2383             MFEM_FOREACH_THREAD(dz,z,D1Dz)
2384             {
2385                const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2386                const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2387 
2388                MFEM_FOREACH_THREAD(dy,y,D1Dy)
2389                {
2390                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
2391                   {
2392                      for (int qy = 0; qy < Q1D; ++qy)
2393                      {
2394                         const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2395                         const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2396 
2397                         for (int qx = 0; qx < Q1D; ++qx)
2398                         {
2399                            const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2400                            const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2401 
2402                            if (c == 0)
2403                            {
2404                               // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2405 
2406                               // (u_0)_{x_2} O22 (u_0)_{x_2}
2407                               dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2408 
2409                               // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2410                               dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2411 
2412                               // (u_0)_{x_1} O33 (u_0)_{x_1}
2413                               dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2414                            }
2415                            else if (c == 1)
2416                            {
2417                               // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2418 
2419                               // (u_1)_{x_2} O11 (u_1)_{x_2}
2420                               dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2421 
2422                               // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2423                               dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2424 
2425                               // (u_1)_{x_0} O33 (u_1)_{x_0})
2426                               dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2427                            }
2428                            else
2429                            {
2430                               // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2431 
2432                               // (u_2)_{x_1} O11 (u_2)_{x_1}
2433                               dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2434 
2435                               // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2436                               dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2437 
2438                               // (u_2)_{x_0} O22 (u_2)_{x_0}
2439                               dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2440                            }
2441                         }
2442                      }
2443                   }
2444                }
2445             }
2446 
2447             MFEM_SYNC_THREAD;
2448          }  // qz loop
2449 
2450          MFEM_FOREACH_THREAD(dz,z,D1Dz)
2451          {
2452             MFEM_FOREACH_THREAD(dy,y,D1Dy)
2453             {
2454                MFEM_FOREACH_THREAD(dx,x,D1Dx)
2455                {
2456                   D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2457                }
2458             }
2459          }
2460 
2461          osc += D1Dx * D1Dy * D1Dz;
2462       }  // c loop
2463    }); // end of element loop
2464 }
2465 
AssembleDiagonalPA(Vector & diag)2466 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2467 {
2468    if (dim == 3)
2469    {
2470       if (Device::Allows(Backend::DEVICE_MASK))
2471       {
2472          const int ID = (dofs1D << 4) | quad1D;
2473          switch (ID)
2474          {
2475             case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2476                                                                        symmetric, ne,
2477                                                                        mapsO->B, mapsC->B,
2478                                                                        mapsO->G, mapsC->G,
2479                                                                        pa_data, diag);
2480             case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2481                                                                        symmetric, ne,
2482                                                                        mapsO->B, mapsC->B,
2483                                                                        mapsO->G, mapsC->G,
2484                                                                        pa_data, diag);
2485             case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2486                                                                        symmetric, ne,
2487                                                                        mapsO->B, mapsC->B,
2488                                                                        mapsO->G, mapsC->G,
2489                                                                        pa_data, diag);
2490             case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2491                                                                        symmetric, ne,
2492                                                                        mapsO->B, mapsC->B,
2493                                                                        mapsO->G, mapsC->G,
2494                                                                        pa_data, diag);
2495             default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2496                                                                 mapsO->B, mapsC->B,
2497                                                                 mapsO->G, mapsC->G,
2498                                                                 pa_data, diag);
2499          }
2500       }
2501       else
2502          PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2503                                       mapsO->B, mapsC->B,
2504                                       mapsO->G, mapsC->G,
2505                                       pa_data, diag);
2506    }
2507    else if (dim == 2)
2508    {
2509       PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2510                                    mapsO->B, mapsC->G, pa_data, diag);
2511    }
2512    else
2513    {
2514       MFEM_ABORT("Unsupported dimension!");
2515    }
2516 }
2517 
2518 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2519 // integrated against H(curl) test functions corresponding to y.
PAHcurlH1Apply3D(const int D1D,const int Q1D,const int NE,const Array<double> & bc,const Array<double> & gc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)2520 void PAHcurlH1Apply3D(const int D1D,
2521                       const int Q1D,
2522                       const int NE,
2523                       const Array<double> &bc,
2524                       const Array<double> &gc,
2525                       const Array<double> &bot,
2526                       const Array<double> &bct,
2527                       const Vector &pa_data,
2528                       const Vector &x,
2529                       Vector &y)
2530 {
2531    constexpr static int MAX_D1D = HCURL_MAX_D1D;
2532    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2533 
2534    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2535    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2536 
2537    constexpr static int VDIM = 3;
2538 
2539    auto Bc = Reshape(bc.Read(), Q1D, D1D);
2540    auto Gc = Reshape(gc.Read(), Q1D, D1D);
2541    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2542    auto Bct = Reshape(bct.Read(), D1D, Q1D);
2543    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2544    auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2545    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2546 
2547    MFEM_FORALL(e, NE,
2548    {
2549       double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2550 
2551       for (int qz = 0; qz < Q1D; ++qz)
2552       {
2553          for (int qy = 0; qy < Q1D; ++qy)
2554          {
2555             for (int qx = 0; qx < Q1D; ++qx)
2556             {
2557                for (int c = 0; c < VDIM; ++c)
2558                {
2559                   mass[qz][qy][qx][c] = 0.0;
2560                }
2561             }
2562          }
2563       }
2564 
2565       for (int dz = 0; dz < D1D; ++dz)
2566       {
2567          double gradXY[MAX_Q1D][MAX_Q1D][3];
2568          for (int qy = 0; qy < Q1D; ++qy)
2569          {
2570             for (int qx = 0; qx < Q1D; ++qx)
2571             {
2572                gradXY[qy][qx][0] = 0.0;
2573                gradXY[qy][qx][1] = 0.0;
2574                gradXY[qy][qx][2] = 0.0;
2575             }
2576          }
2577          for (int dy = 0; dy < D1D; ++dy)
2578          {
2579             double gradX[MAX_Q1D][2];
2580             for (int qx = 0; qx < Q1D; ++qx)
2581             {
2582                gradX[qx][0] = 0.0;
2583                gradX[qx][1] = 0.0;
2584             }
2585             for (int dx = 0; dx < D1D; ++dx)
2586             {
2587                const double s = X(dx,dy,dz,e);
2588                for (int qx = 0; qx < Q1D; ++qx)
2589                {
2590                   gradX[qx][0] += s * Bc(qx,dx);
2591                   gradX[qx][1] += s * Gc(qx,dx);
2592                }
2593             }
2594             for (int qy = 0; qy < Q1D; ++qy)
2595             {
2596                const double wy  = Bc(qy,dy);
2597                const double wDy = Gc(qy,dy);
2598                for (int qx = 0; qx < Q1D; ++qx)
2599                {
2600                   const double wx  = gradX[qx][0];
2601                   const double wDx = gradX[qx][1];
2602                   gradXY[qy][qx][0] += wDx * wy;
2603                   gradXY[qy][qx][1] += wx * wDy;
2604                   gradXY[qy][qx][2] += wx * wy;
2605                }
2606             }
2607          }
2608          for (int qz = 0; qz < Q1D; ++qz)
2609          {
2610             const double wz  = Bc(qz,dz);
2611             const double wDz = Gc(qz,dz);
2612             for (int qy = 0; qy < Q1D; ++qy)
2613             {
2614                for (int qx = 0; qx < Q1D; ++qx)
2615                {
2616                   mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2617                   mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2618                   mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2619                }
2620             }
2621          }
2622       }
2623 
2624       // Apply D operator.
2625       for (int qz = 0; qz < Q1D; ++qz)
2626       {
2627          for (int qy = 0; qy < Q1D; ++qy)
2628          {
2629             for (int qx = 0; qx < Q1D; ++qx)
2630             {
2631                const double O11 = op(qx,qy,qz,0,e);
2632                const double O12 = op(qx,qy,qz,1,e);
2633                const double O13 = op(qx,qy,qz,2,e);
2634                const double O22 = op(qx,qy,qz,3,e);
2635                const double O23 = op(qx,qy,qz,4,e);
2636                const double O33 = op(qx,qy,qz,5,e);
2637                const double massX = mass[qz][qy][qx][0];
2638                const double massY = mass[qz][qy][qx][1];
2639                const double massZ = mass[qz][qy][qx][2];
2640                mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2641                mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2642                mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2643             }
2644          }
2645       }
2646 
2647       for (int qz = 0; qz < Q1D; ++qz)
2648       {
2649          double massXY[MAX_D1D][MAX_D1D];
2650 
2651          int osc = 0;
2652 
2653          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
2654          {
2655             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2656             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2657             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2658 
2659             for (int dy = 0; dy < D1Dy; ++dy)
2660             {
2661                for (int dx = 0; dx < D1Dx; ++dx)
2662                {
2663                   massXY[dy][dx] = 0.0;
2664                }
2665             }
2666             for (int qy = 0; qy < Q1D; ++qy)
2667             {
2668                double massX[MAX_D1D];
2669                for (int dx = 0; dx < D1Dx; ++dx)
2670                {
2671                   massX[dx] = 0;
2672                }
2673                for (int qx = 0; qx < Q1D; ++qx)
2674                {
2675                   for (int dx = 0; dx < D1Dx; ++dx)
2676                   {
2677                      massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2678                   }
2679                }
2680                for (int dy = 0; dy < D1Dy; ++dy)
2681                {
2682                   const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2683                   for (int dx = 0; dx < D1Dx; ++dx)
2684                   {
2685                      massXY[dy][dx] += massX[dx] * wy;
2686                   }
2687                }
2688             }
2689 
2690             for (int dz = 0; dz < D1Dz; ++dz)
2691             {
2692                const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2693                for (int dy = 0; dy < D1Dy; ++dy)
2694                {
2695                   for (int dx = 0; dx < D1Dx; ++dx)
2696                   {
2697                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2698                   }
2699                }
2700             }
2701 
2702             osc += D1Dx * D1Dy * D1Dz;
2703          }  // loop c
2704       }  // loop qz
2705    }); // end of element loop
2706 }
2707 
2708 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2709 // integrated against H(curl) test functions corresponding to y.
PAHcurlH1Apply2D(const int D1D,const int Q1D,const int NE,const Array<double> & bc,const Array<double> & gc,const Array<double> & bot,const Array<double> & bct,const Vector & pa_data,const Vector & x,Vector & y)2710 void PAHcurlH1Apply2D(const int D1D,
2711                       const int Q1D,
2712                       const int NE,
2713                       const Array<double> &bc,
2714                       const Array<double> &gc,
2715                       const Array<double> &bot,
2716                       const Array<double> &bct,
2717                       const Vector &pa_data,
2718                       const Vector &x,
2719                       Vector &y)
2720 {
2721    constexpr static int VDIM = 2;
2722    constexpr static int MAX_D1D = HCURL_MAX_D1D;
2723    constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2724 
2725    auto Bc = Reshape(bc.Read(), Q1D, D1D);
2726    auto Gc = Reshape(gc.Read(), Q1D, D1D);
2727    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2728    auto Bct = Reshape(bct.Read(), D1D, Q1D);
2729    auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
2730    auto X = Reshape(x.Read(), D1D, D1D, NE);
2731    auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2732 
2733    MFEM_FORALL(e, NE,
2734    {
2735       double mass[MAX_Q1D][MAX_Q1D][VDIM];
2736 
2737       for (int qy = 0; qy < Q1D; ++qy)
2738       {
2739          for (int qx = 0; qx < Q1D; ++qx)
2740          {
2741             for (int c = 0; c < VDIM; ++c)
2742             {
2743                mass[qy][qx][c] = 0.0;
2744             }
2745          }
2746       }
2747 
2748       for (int dy = 0; dy < D1D; ++dy)
2749       {
2750          double gradX[MAX_Q1D][2];
2751          for (int qx = 0; qx < Q1D; ++qx)
2752          {
2753             gradX[qx][0] = 0.0;
2754             gradX[qx][1] = 0.0;
2755          }
2756          for (int dx = 0; dx < D1D; ++dx)
2757          {
2758             const double s = X(dx,dy,e);
2759             for (int qx = 0; qx < Q1D; ++qx)
2760             {
2761                gradX[qx][0] += s * Bc(qx,dx);
2762                gradX[qx][1] += s * Gc(qx,dx);
2763             }
2764          }
2765          for (int qy = 0; qy < Q1D; ++qy)
2766          {
2767             const double wy  = Bc(qy,dy);
2768             const double wDy = Gc(qy,dy);
2769             for (int qx = 0; qx < Q1D; ++qx)
2770             {
2771                const double wx  = gradX[qx][0];
2772                const double wDx = gradX[qx][1];
2773                mass[qy][qx][0] += wDx * wy;
2774                mass[qy][qx][1] += wx * wDy;
2775             }
2776          }
2777       }
2778 
2779       // Apply D operator.
2780       for (int qy = 0; qy < Q1D; ++qy)
2781       {
2782          for (int qx = 0; qx < Q1D; ++qx)
2783          {
2784             const double O11 = op(qx,qy,0,e);
2785             const double O12 = op(qx,qy,1,e);
2786             const double O22 = op(qx,qy,2,e);
2787             const double massX = mass[qy][qx][0];
2788             const double massY = mass[qy][qx][1];
2789             mass[qy][qx][0] = (O11*massX)+(O12*massY);
2790             mass[qy][qx][1] = (O12*massX)+(O22*massY);
2791          }
2792       }
2793 
2794       for (int qy = 0; qy < Q1D; ++qy)
2795       {
2796          int osc = 0;
2797 
2798          for (int c = 0; c < VDIM; ++c)  // loop over x, y components
2799          {
2800             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2801             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2802 
2803             double massX[MAX_D1D];
2804             for (int dx = 0; dx < D1Dx; ++dx)
2805             {
2806                massX[dx] = 0;
2807             }
2808             for (int qx = 0; qx < Q1D; ++qx)
2809             {
2810                for (int dx = 0; dx < D1Dx; ++dx)
2811                {
2812                   massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2813                }
2814             }
2815 
2816             for (int dy = 0; dy < D1Dy; ++dy)
2817             {
2818                const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2819 
2820                for (int dx = 0; dx < D1Dx; ++dx)
2821                {
2822                   Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
2823                }
2824             }
2825 
2826             osc += D1Dx * D1Dy;
2827          }  // loop c
2828       }
2829    }); // end of element loop
2830 }
2831 
2832 // PA H(curl) Mass Assemble 3D kernel
PAHcurlL2Setup(const int NQ,const int coeffDim,const int NE,const Array<double> & w,Vector & coeff,Vector & op)2833 void PAHcurlL2Setup(const int NQ,
2834                     const int coeffDim,
2835                     const int NE,
2836                     const Array<double> &w,
2837                     Vector &coeff,
2838                     Vector &op)
2839 {
2840    auto W = w.Read();
2841    auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
2842    auto y = Reshape(op.Write(), coeffDim, NQ, NE);
2843 
2844    MFEM_FORALL(e, NE,
2845    {
2846       for (int q = 0; q < NQ; ++q)
2847       {
2848          for (int c=0; c<coeffDim; ++c)
2849          {
2850             y(c,q,e) = W[q] * C(c,q,e);
2851          }
2852       }
2853    });
2854 }
2855 
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)2856 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
2857                                            const FiniteElementSpace &test_fes)
2858 {
2859    // Assumes tensor-product elements, with vector test and trial spaces.
2860    Mesh *mesh = trial_fes.GetMesh();
2861    const FiniteElement *trial_fel = trial_fes.GetFE(0);
2862    const FiniteElement *test_fel = test_fes.GetFE(0);
2863 
2864    const VectorTensorFiniteElement *trial_el =
2865       dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
2866    MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
2867 
2868    const VectorTensorFiniteElement *test_el =
2869       dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
2870    MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
2871 
2872    const IntegrationRule *ir
2873       = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
2874                                                      *mesh->GetElementTransformation(0));
2875    const int dims = trial_el->GetDim();
2876    MFEM_VERIFY(dims == 3, "");
2877 
2878    const int nq = ir->GetNPoints();
2879    dim = mesh->Dimension();
2880    MFEM_VERIFY(dim == 3, "");
2881 
2882    MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
2883 
2884    ne = trial_fes.GetNE();
2885    geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
2886    mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2887    mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2888    mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2889    mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2890    dofs1D = mapsC->ndof;
2891    quad1D = mapsC->nqpt;
2892    dofs1Dtest = mapsCtest->ndof;
2893 
2894    MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
2895 
2896    testType = test_el->GetDerivType();
2897    trialType = trial_el->GetDerivType();
2898 
2899    const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
2900    coeffDim = (DQ ? 3 : 1);
2901 
2902    pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType());
2903 
2904    Vector coeff(coeffDim * nq * ne);
2905    coeff = 1.0;
2906    auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
2907    if (Q || DQ)
2908    {
2909       Vector V(coeffDim);
2910       if (DQ)
2911       {
2912          MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
2913       }
2914 
2915       for (int e=0; e<ne; ++e)
2916       {
2917          ElementTransformation *tr = mesh->GetElementTransformation(e);
2918 
2919          for (int p=0; p<nq; ++p)
2920          {
2921             if (DQ)
2922             {
2923                DQ->Eval(V, *tr, ir->IntPoint(p));
2924                for (int i=0; i<coeffDim; ++i)
2925                {
2926                   coeffh(i, p, e) = V[i];
2927                }
2928             }
2929             else
2930             {
2931                coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
2932             }
2933          }
2934       }
2935    }
2936 
2937    if (testType == mfem::FiniteElement::CURL &&
2938        trialType == mfem::FiniteElement::CURL && dim == 3)
2939    {
2940       PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
2941    }
2942    else if (testType == mfem::FiniteElement::DIV &&
2943             trialType == mfem::FiniteElement::CURL && dim == 3 &&
2944             test_fel->GetOrder() == trial_fel->GetOrder())
2945    {
2946       PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
2947                         pa_data);
2948    }
2949    else
2950    {
2951       MFEM_ABORT("Unknown kernel.");
2952    }
2953 }
2954 
2955 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
2956 // integrated against H(curl) test functions corresponding to y.
2957 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PAHcurlL2Apply3D(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)2958 static void PAHcurlL2Apply3D(const int D1D,
2959                              const int Q1D,
2960                              const int coeffDim,
2961                              const int NE,
2962                              const Array<double> &bo,
2963                              const Array<double> &bc,
2964                              const Array<double> &bot,
2965                              const Array<double> &bct,
2966                              const Array<double> &gc,
2967                              const Vector &pa_data,
2968                              const Vector &x,
2969                              Vector &y)
2970 {
2971    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2972    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2973    // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2974    // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
2975    // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
2976    // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2977    // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2978    // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2979 
2980    constexpr static int VDIM = 3;
2981 
2982    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2983    auto Bc = Reshape(bc.Read(), Q1D, D1D);
2984    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2985    auto Bct = Reshape(bct.Read(), D1D, Q1D);
2986    auto Gc = Reshape(gc.Read(), Q1D, D1D);
2987    auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
2988    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2989    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2990 
2991    MFEM_FORALL(e, NE,
2992    {
2993       double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2994       // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
2995 
2996       for (int qz = 0; qz < Q1D; ++qz)
2997       {
2998          for (int qy = 0; qy < Q1D; ++qy)
2999          {
3000             for (int qx = 0; qx < Q1D; ++qx)
3001             {
3002                for (int c = 0; c < VDIM; ++c)
3003                {
3004                   curl[qz][qy][qx][c] = 0.0;
3005                }
3006             }
3007          }
3008       }
3009 
3010       // We treat x, y, z components separately for optimization specific to each.
3011 
3012       int osc = 0;
3013 
3014       {
3015          // x component
3016          const int D1Dz = D1D;
3017          const int D1Dy = D1D;
3018          const int D1Dx = D1D - 1;
3019 
3020          for (int dz = 0; dz < D1Dz; ++dz)
3021          {
3022             double gradXY[MAX_Q1D][MAX_Q1D][2];
3023             for (int qy = 0; qy < Q1D; ++qy)
3024             {
3025                for (int qx = 0; qx < Q1D; ++qx)
3026                {
3027                   for (int d = 0; d < 2; ++d)
3028                   {
3029                      gradXY[qy][qx][d] = 0.0;
3030                   }
3031                }
3032             }
3033 
3034             for (int dy = 0; dy < D1Dy; ++dy)
3035             {
3036                double massX[MAX_Q1D];
3037                for (int qx = 0; qx < Q1D; ++qx)
3038                {
3039                   massX[qx] = 0.0;
3040                }
3041 
3042                for (int dx = 0; dx < D1Dx; ++dx)
3043                {
3044                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3045                   for (int qx = 0; qx < Q1D; ++qx)
3046                   {
3047                      massX[qx] += t * Bo(qx,dx);
3048                   }
3049                }
3050 
3051                for (int qy = 0; qy < Q1D; ++qy)
3052                {
3053                   const double wy = Bc(qy,dy);
3054                   const double wDy = Gc(qy,dy);
3055                   for (int qx = 0; qx < Q1D; ++qx)
3056                   {
3057                      const double wx = massX[qx];
3058                      gradXY[qy][qx][0] += wx * wDy;
3059                      gradXY[qy][qx][1] += wx * wy;
3060                   }
3061                }
3062             }
3063 
3064             for (int qz = 0; qz < Q1D; ++qz)
3065             {
3066                const double wz = Bc(qz,dz);
3067                const double wDz = Gc(qz,dz);
3068                for (int qy = 0; qy < Q1D; ++qy)
3069                {
3070                   for (int qx = 0; qx < Q1D; ++qx)
3071                   {
3072                      // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3073                      curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3074                      curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz;  // -(u_0)_{x_1}
3075                   }
3076                }
3077             }
3078          }
3079 
3080          osc += D1Dx * D1Dy * D1Dz;
3081       }
3082 
3083       {
3084          // y component
3085          const int D1Dz = D1D;
3086          const int D1Dy = D1D - 1;
3087          const int D1Dx = D1D;
3088 
3089          for (int dz = 0; dz < D1Dz; ++dz)
3090          {
3091             double gradXY[MAX_Q1D][MAX_Q1D][2];
3092             for (int qy = 0; qy < Q1D; ++qy)
3093             {
3094                for (int qx = 0; qx < Q1D; ++qx)
3095                {
3096                   for (int d = 0; d < 2; ++d)
3097                   {
3098                      gradXY[qy][qx][d] = 0.0;
3099                   }
3100                }
3101             }
3102 
3103             for (int dx = 0; dx < D1Dx; ++dx)
3104             {
3105                double massY[MAX_Q1D];
3106                for (int qy = 0; qy < Q1D; ++qy)
3107                {
3108                   massY[qy] = 0.0;
3109                }
3110 
3111                for (int dy = 0; dy < D1Dy; ++dy)
3112                {
3113                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3114                   for (int qy = 0; qy < Q1D; ++qy)
3115                   {
3116                      massY[qy] += t * Bo(qy,dy);
3117                   }
3118                }
3119 
3120                for (int qx = 0; qx < Q1D; ++qx)
3121                {
3122                   const double wx = Bc(qx,dx);
3123                   const double wDx = Gc(qx,dx);
3124                   for (int qy = 0; qy < Q1D; ++qy)
3125                   {
3126                      const double wy = massY[qy];
3127                      gradXY[qy][qx][0] += wDx * wy;
3128                      gradXY[qy][qx][1] += wx * wy;
3129                   }
3130                }
3131             }
3132 
3133             for (int qz = 0; qz < Q1D; ++qz)
3134             {
3135                const double wz = Bc(qz,dz);
3136                const double wDz = Gc(qz,dz);
3137                for (int qy = 0; qy < Q1D; ++qy)
3138                {
3139                   for (int qx = 0; qx < Q1D; ++qx)
3140                   {
3141                      // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3142                      curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3143                      curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz;  // (u_1)_{x_0}
3144                   }
3145                }
3146             }
3147          }
3148 
3149          osc += D1Dx * D1Dy * D1Dz;
3150       }
3151 
3152       {
3153          // z component
3154          const int D1Dz = D1D - 1;
3155          const int D1Dy = D1D;
3156          const int D1Dx = D1D;
3157 
3158          for (int dx = 0; dx < D1Dx; ++dx)
3159          {
3160             double gradYZ[MAX_Q1D][MAX_Q1D][2];
3161             for (int qz = 0; qz < Q1D; ++qz)
3162             {
3163                for (int qy = 0; qy < Q1D; ++qy)
3164                {
3165                   for (int d = 0; d < 2; ++d)
3166                   {
3167                      gradYZ[qz][qy][d] = 0.0;
3168                   }
3169                }
3170             }
3171 
3172             for (int dy = 0; dy < D1Dy; ++dy)
3173             {
3174                double massZ[MAX_Q1D];
3175                for (int qz = 0; qz < Q1D; ++qz)
3176                {
3177                   massZ[qz] = 0.0;
3178                }
3179 
3180                for (int dz = 0; dz < D1Dz; ++dz)
3181                {
3182                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3183                   for (int qz = 0; qz < Q1D; ++qz)
3184                   {
3185                      massZ[qz] += t * Bo(qz,dz);
3186                   }
3187                }
3188 
3189                for (int qy = 0; qy < Q1D; ++qy)
3190                {
3191                   const double wy = Bc(qy,dy);
3192                   const double wDy = Gc(qy,dy);
3193                   for (int qz = 0; qz < Q1D; ++qz)
3194                   {
3195                      const double wz = massZ[qz];
3196                      gradYZ[qz][qy][0] += wz * wy;
3197                      gradYZ[qz][qy][1] += wz * wDy;
3198                   }
3199                }
3200             }
3201 
3202             for (int qx = 0; qx < Q1D; ++qx)
3203             {
3204                const double wx = Bc(qx,dx);
3205                const double wDx = Gc(qx,dx);
3206 
3207                for (int qy = 0; qy < Q1D; ++qy)
3208                {
3209                   for (int qz = 0; qz < Q1D; ++qz)
3210                   {
3211                      // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3212                      curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx;  // (u_2)_{x_1}
3213                      curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3214                   }
3215                }
3216             }
3217          }
3218       }
3219 
3220       // Apply D operator.
3221       for (int qz = 0; qz < Q1D; ++qz)
3222       {
3223          for (int qy = 0; qy < Q1D; ++qy)
3224          {
3225             for (int qx = 0; qx < Q1D; ++qx)
3226             {
3227                for (int c = 0; c < VDIM; ++c)
3228                {
3229                   curl[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
3230                }
3231             }
3232          }
3233       }
3234 
3235       for (int qz = 0; qz < Q1D; ++qz)
3236       {
3237          double massXY[MAX_D1D][MAX_D1D];
3238 
3239          osc = 0;
3240 
3241          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
3242          {
3243             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3244             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3245             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3246 
3247             for (int dy = 0; dy < D1Dy; ++dy)
3248             {
3249                for (int dx = 0; dx < D1Dx; ++dx)
3250                {
3251                   massXY[dy][dx] = 0;
3252                }
3253             }
3254             for (int qy = 0; qy < Q1D; ++qy)
3255             {
3256                double massX[MAX_D1D];
3257                for (int dx = 0; dx < D1Dx; ++dx)
3258                {
3259                   massX[dx] = 0.0;
3260                }
3261                for (int qx = 0; qx < Q1D; ++qx)
3262                {
3263                   for (int dx = 0; dx < D1Dx; ++dx)
3264                   {
3265                      massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3266                   }
3267                }
3268 
3269                for (int dy = 0; dy < D1Dy; ++dy)
3270                {
3271                   const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3272                   for (int dx = 0; dx < D1Dx; ++dx)
3273                   {
3274                      massXY[dy][dx] += massX[dx] * wy;
3275                   }
3276                }
3277             }
3278 
3279             for (int dz = 0; dz < D1Dz; ++dz)
3280             {
3281                const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
3282                for (int dy = 0; dy < D1Dy; ++dy)
3283                {
3284                   for (int dx = 0; dx < D1Dx; ++dx)
3285                   {
3286                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
3287                   }
3288                }
3289             }
3290 
3291             osc += D1Dx * D1Dy * D1Dz;
3292          }  // loop c
3293       }  // loop qz
3294    }); // end of element loop
3295 }
3296 
3297 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3298 // integrated against H(curl) test functions corresponding to y.
3299 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPAHcurlL2Apply3D(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)3300 static void SmemPAHcurlL2Apply3D(const int D1D,
3301                                  const int Q1D,
3302                                  const int coeffDim,
3303                                  const int NE,
3304                                  const Array<double> &bo,
3305                                  const Array<double> &bc,
3306                                  const Array<double> &gc,
3307                                  const Vector &pa_data,
3308                                  const Vector &x,
3309                                  Vector &y)
3310 {
3311    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3312    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3313 
3314    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3315    auto Bc = Reshape(bc.Read(), Q1D, D1D);
3316    auto Gc = Reshape(gc.Read(), Q1D, D1D);
3317    auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3318    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3319    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3320 
3321    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
3322    {
3323       constexpr int VDIM = 3;
3324       constexpr int maxCoeffDim = 3;
3325 
3326       MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
3327       MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
3328       MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
3329 
3330       double opc[maxCoeffDim];
3331       MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
3332       MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
3333 
3334       MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
3335 
3336       MFEM_FOREACH_THREAD(qx,x,Q1D)
3337       {
3338          MFEM_FOREACH_THREAD(qy,y,Q1D)
3339          {
3340             MFEM_FOREACH_THREAD(qz,z,Q1D)
3341             {
3342                for (int i=0; i<coeffDim; ++i)
3343                {
3344                   opc[i] = op(i,qx,qy,qz,e);
3345                }
3346             }
3347          }
3348       }
3349 
3350       const int tidx = MFEM_THREAD_ID(x);
3351       const int tidy = MFEM_THREAD_ID(y);
3352       const int tidz = MFEM_THREAD_ID(z);
3353 
3354       if (tidz == 0)
3355       {
3356          MFEM_FOREACH_THREAD(d,y,D1D)
3357          {
3358             MFEM_FOREACH_THREAD(q,x,Q1D)
3359             {
3360                sBc[d][q] = Bc(q,d);
3361                sGc[d][q] = Gc(q,d);
3362                if (d < D1D-1)
3363                {
3364                   sBo[d][q] = Bo(q,d);
3365                }
3366             }
3367          }
3368       }
3369       MFEM_SYNC_THREAD;
3370 
3371       for (int qz=0; qz < Q1D; ++qz)
3372       {
3373          if (tidz == qz)
3374          {
3375             MFEM_FOREACH_THREAD(qy,y,Q1D)
3376             {
3377                MFEM_FOREACH_THREAD(qx,x,Q1D)
3378                {
3379                   for (int i=0; i<3; ++i)
3380                   {
3381                      curl[qy][qx][i] = 0.0;
3382                   }
3383                }
3384             }
3385          }
3386 
3387          int osc = 0;
3388          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
3389          {
3390             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3391             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3392             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3393 
3394             MFEM_FOREACH_THREAD(dz,z,D1Dz)
3395             {
3396                MFEM_FOREACH_THREAD(dy,y,D1Dy)
3397                {
3398                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
3399                   {
3400                      sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3401                   }
3402                }
3403             }
3404             MFEM_SYNC_THREAD;
3405 
3406             if (tidz == qz)
3407             {
3408                if (c == 0)
3409                {
3410                   for (int i=0; i<coeffDim; ++i)
3411                   {
3412                      sop[i][tidx][tidy] = opc[i];
3413                   }
3414                }
3415 
3416                MFEM_FOREACH_THREAD(qy,y,Q1D)
3417                {
3418                   MFEM_FOREACH_THREAD(qx,x,Q1D)
3419                   {
3420                      double u = 0.0;
3421                      double v = 0.0;
3422 
3423                      // We treat x, y, z components separately for optimization specific to each.
3424                      if (c == 0) // x component
3425                      {
3426                         // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3427 
3428                         for (int dz = 0; dz < D1Dz; ++dz)
3429                         {
3430                            const double wz = sBc[dz][qz];
3431                            const double wDz = sGc[dz][qz];
3432 
3433                            for (int dy = 0; dy < D1Dy; ++dy)
3434                            {
3435                               const double wy = sBc[dy][qy];
3436                               const double wDy = sGc[dy][qy];
3437 
3438                               for (int dx = 0; dx < D1Dx; ++dx)
3439                               {
3440                                  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
3441                                  u += wx * wDy * wz;
3442                                  v += wx * wy * wDz;
3443                               }
3444                            }
3445                         }
3446 
3447                         curl[qy][qx][1] += v; // (u_0)_{x_2}
3448                         curl[qy][qx][2] -= u;  // -(u_0)_{x_1}
3449                      }
3450                      else if (c == 1)  // y component
3451                      {
3452                         // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3453 
3454                         for (int dz = 0; dz < D1Dz; ++dz)
3455                         {
3456                            const double wz = sBc[dz][qz];
3457                            const double wDz = sGc[dz][qz];
3458 
3459                            for (int dy = 0; dy < D1Dy; ++dy)
3460                            {
3461                               const double wy = sBo[dy][qy];
3462 
3463                               for (int dx = 0; dx < D1Dx; ++dx)
3464                               {
3465                                  const double t = sX[dz][dy][dx];
3466                                  const double wx = t * sBc[dx][qx];
3467                                  const double wDx = t * sGc[dx][qx];
3468 
3469                                  u += wDx * wy * wz;
3470                                  v += wx * wy * wDz;
3471                               }
3472                            }
3473                         }
3474 
3475                         curl[qy][qx][0] -= v; // -(u_1)_{x_2}
3476                         curl[qy][qx][2] += u; // (u_1)_{x_0}
3477                      }
3478                      else // z component
3479                      {
3480                         // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3481 
3482                         for (int dz = 0; dz < D1Dz; ++dz)
3483                         {
3484                            const double wz = sBo[dz][qz];
3485 
3486                            for (int dy = 0; dy < D1Dy; ++dy)
3487                            {
3488                               const double wy = sBc[dy][qy];
3489                               const double wDy = sGc[dy][qy];
3490 
3491                               for (int dx = 0; dx < D1Dx; ++dx)
3492                               {
3493                                  const double t = sX[dz][dy][dx];
3494                                  const double wx = t * sBc[dx][qx];
3495                                  const double wDx = t * sGc[dx][qx];
3496 
3497                                  u += wDx * wy * wz;
3498                                  v += wx * wDy * wz;
3499                               }
3500                            }
3501                         }
3502 
3503                         curl[qy][qx][0] += v; // (u_2)_{x_1}
3504                         curl[qy][qx][1] -= u; // -(u_2)_{x_0}
3505                      }
3506                   } // qx
3507                } // qy
3508             } // tidz == qz
3509 
3510             osc += D1Dx * D1Dy * D1Dz;
3511             MFEM_SYNC_THREAD;
3512          } // c
3513 
3514          double dxyz1 = 0.0;
3515          double dxyz2 = 0.0;
3516          double dxyz3 = 0.0;
3517 
3518          MFEM_FOREACH_THREAD(dz,z,D1D)
3519          {
3520             const double wcz = sBc[dz][qz];
3521             const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
3522 
3523             MFEM_FOREACH_THREAD(dy,y,D1D)
3524             {
3525                MFEM_FOREACH_THREAD(dx,x,D1D)
3526                {
3527                   for (int qy = 0; qy < Q1D; ++qy)
3528                   {
3529                      const double wcy = sBc[dy][qy];
3530                      const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
3531 
3532                      for (int qx = 0; qx < Q1D; ++qx)
3533                      {
3534                         const double O1 = sop[0][qx][qy];
3535                         const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
3536                         const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
3537 
3538                         const double c1 = O1 * curl[qy][qx][0];
3539                         const double c2 = O2 * curl[qy][qx][1];
3540                         const double c3 = O3 * curl[qy][qx][2];
3541 
3542                         const double wcx = sBc[dx][qx];
3543 
3544                         if (dx < D1D-1)
3545                         {
3546                            const double wx = sBo[dx][qx];
3547                            dxyz1 += c1 * wx * wcy * wcz;
3548                         }
3549 
3550                         dxyz2 += c2 * wcx * wy * wcz;
3551                         dxyz3 += c3 * wcx * wcy * wz;
3552                      } // qx
3553                   } // qy
3554                } // dx
3555             } // dy
3556          } // dz
3557 
3558          MFEM_SYNC_THREAD;
3559 
3560          MFEM_FOREACH_THREAD(dz,z,D1D)
3561          {
3562             MFEM_FOREACH_THREAD(dy,y,D1D)
3563             {
3564                MFEM_FOREACH_THREAD(dx,x,D1D)
3565                {
3566                   if (dx < D1D-1)
3567                   {
3568                      Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
3569                   }
3570                   if (dy < D1D-1)
3571                   {
3572                      Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
3573                   }
3574                   if (dz < D1D-1)
3575                   {
3576                      Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
3577                   }
3578                }
3579             }
3580          }
3581       } // qz
3582    }); // end of element loop
3583 }
3584 
3585 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3586 // integrated against H(div) test functions corresponding to y.
3587 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PAHcurlHdivApply3D(const int D1D,const int D1Dtest,const int Q1D,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)3588 static void PAHcurlHdivApply3D(const int D1D,
3589                                const int D1Dtest,
3590                                const int Q1D,
3591                                const int NE,
3592                                const Array<double> &bo,
3593                                const Array<double> &bc,
3594                                const Array<double> &bot,
3595                                const Array<double> &bct,
3596                                const Array<double> &gc,
3597                                const Vector &pa_data,
3598                                const Vector &x,
3599                                Vector &y)
3600 {
3601    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3602    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3603    // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
3604    // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
3605    // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
3606    // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3607    // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3608    // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3609 
3610    constexpr static int VDIM = 3;
3611 
3612    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3613    auto Bc = Reshape(bc.Read(), Q1D, D1D);
3614    auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
3615    auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
3616    auto Gc = Reshape(gc.Read(), Q1D, D1D);
3617    auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
3618    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3619    auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
3620 
3621    MFEM_FORALL(e, NE,
3622    {
3623       double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3624       // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3625 
3626       for (int qz = 0; qz < Q1D; ++qz)
3627       {
3628          for (int qy = 0; qy < Q1D; ++qy)
3629          {
3630             for (int qx = 0; qx < Q1D; ++qx)
3631             {
3632                for (int c = 0; c < VDIM; ++c)
3633                {
3634                   curl[qz][qy][qx][c] = 0.0;
3635                }
3636             }
3637          }
3638       }
3639 
3640       // We treat x, y, z components separately for optimization specific to each.
3641 
3642       int osc = 0;
3643 
3644       {
3645          // x component
3646          const int D1Dz = D1D;
3647          const int D1Dy = D1D;
3648          const int D1Dx = D1D - 1;
3649 
3650          for (int dz = 0; dz < D1Dz; ++dz)
3651          {
3652             double gradXY[MAX_Q1D][MAX_Q1D][2];
3653             for (int qy = 0; qy < Q1D; ++qy)
3654             {
3655                for (int qx = 0; qx < Q1D; ++qx)
3656                {
3657                   for (int d = 0; d < 2; ++d)
3658                   {
3659                      gradXY[qy][qx][d] = 0.0;
3660                   }
3661                }
3662             }
3663 
3664             for (int dy = 0; dy < D1Dy; ++dy)
3665             {
3666                double massX[MAX_Q1D];
3667                for (int qx = 0; qx < Q1D; ++qx)
3668                {
3669                   massX[qx] = 0.0;
3670                }
3671 
3672                for (int dx = 0; dx < D1Dx; ++dx)
3673                {
3674                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3675                   for (int qx = 0; qx < Q1D; ++qx)
3676                   {
3677                      massX[qx] += t * Bo(qx,dx);
3678                   }
3679                }
3680 
3681                for (int qy = 0; qy < Q1D; ++qy)
3682                {
3683                   const double wy = Bc(qy,dy);
3684                   const double wDy = Gc(qy,dy);
3685                   for (int qx = 0; qx < Q1D; ++qx)
3686                   {
3687                      const double wx = massX[qx];
3688                      gradXY[qy][qx][0] += wx * wDy;
3689                      gradXY[qy][qx][1] += wx * wy;
3690                   }
3691                }
3692             }
3693 
3694             for (int qz = 0; qz < Q1D; ++qz)
3695             {
3696                const double wz = Bc(qz,dz);
3697                const double wDz = Gc(qz,dz);
3698                for (int qy = 0; qy < Q1D; ++qy)
3699                {
3700                   for (int qx = 0; qx < Q1D; ++qx)
3701                   {
3702                      // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3703                      curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3704                      curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz;  // -(u_0)_{x_1}
3705                   }
3706                }
3707             }
3708          }
3709 
3710          osc += D1Dx * D1Dy * D1Dz;
3711       }
3712 
3713       {
3714          // y component
3715          const int D1Dz = D1D;
3716          const int D1Dy = D1D - 1;
3717          const int D1Dx = D1D;
3718 
3719          for (int dz = 0; dz < D1Dz; ++dz)
3720          {
3721             double gradXY[MAX_Q1D][MAX_Q1D][2];
3722             for (int qy = 0; qy < Q1D; ++qy)
3723             {
3724                for (int qx = 0; qx < Q1D; ++qx)
3725                {
3726                   for (int d = 0; d < 2; ++d)
3727                   {
3728                      gradXY[qy][qx][d] = 0.0;
3729                   }
3730                }
3731             }
3732 
3733             for (int dx = 0; dx < D1Dx; ++dx)
3734             {
3735                double massY[MAX_Q1D];
3736                for (int qy = 0; qy < Q1D; ++qy)
3737                {
3738                   massY[qy] = 0.0;
3739                }
3740 
3741                for (int dy = 0; dy < D1Dy; ++dy)
3742                {
3743                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3744                   for (int qy = 0; qy < Q1D; ++qy)
3745                   {
3746                      massY[qy] += t * Bo(qy,dy);
3747                   }
3748                }
3749 
3750                for (int qx = 0; qx < Q1D; ++qx)
3751                {
3752                   const double wx = Bc(qx,dx);
3753                   const double wDx = Gc(qx,dx);
3754                   for (int qy = 0; qy < Q1D; ++qy)
3755                   {
3756                      const double wy = massY[qy];
3757                      gradXY[qy][qx][0] += wDx * wy;
3758                      gradXY[qy][qx][1] += wx * wy;
3759                   }
3760                }
3761             }
3762 
3763             for (int qz = 0; qz < Q1D; ++qz)
3764             {
3765                const double wz = Bc(qz,dz);
3766                const double wDz = Gc(qz,dz);
3767                for (int qy = 0; qy < Q1D; ++qy)
3768                {
3769                   for (int qx = 0; qx < Q1D; ++qx)
3770                   {
3771                      // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3772                      curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3773                      curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz;  // (u_1)_{x_0}
3774                   }
3775                }
3776             }
3777          }
3778 
3779          osc += D1Dx * D1Dy * D1Dz;
3780       }
3781 
3782       {
3783          // z component
3784          const int D1Dz = D1D - 1;
3785          const int D1Dy = D1D;
3786          const int D1Dx = D1D;
3787 
3788          for (int dx = 0; dx < D1Dx; ++dx)
3789          {
3790             double gradYZ[MAX_Q1D][MAX_Q1D][2];
3791             for (int qz = 0; qz < Q1D; ++qz)
3792             {
3793                for (int qy = 0; qy < Q1D; ++qy)
3794                {
3795                   for (int d = 0; d < 2; ++d)
3796                   {
3797                      gradYZ[qz][qy][d] = 0.0;
3798                   }
3799                }
3800             }
3801 
3802             for (int dy = 0; dy < D1Dy; ++dy)
3803             {
3804                double massZ[MAX_Q1D];
3805                for (int qz = 0; qz < Q1D; ++qz)
3806                {
3807                   massZ[qz] = 0.0;
3808                }
3809 
3810                for (int dz = 0; dz < D1Dz; ++dz)
3811                {
3812                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3813                   for (int qz = 0; qz < Q1D; ++qz)
3814                   {
3815                      massZ[qz] += t * Bo(qz,dz);
3816                   }
3817                }
3818 
3819                for (int qy = 0; qy < Q1D; ++qy)
3820                {
3821                   const double wy = Bc(qy,dy);
3822                   const double wDy = Gc(qy,dy);
3823                   for (int qz = 0; qz < Q1D; ++qz)
3824                   {
3825                      const double wz = massZ[qz];
3826                      gradYZ[qz][qy][0] += wz * wy;
3827                      gradYZ[qz][qy][1] += wz * wDy;
3828                   }
3829                }
3830             }
3831 
3832             for (int qx = 0; qx < Q1D; ++qx)
3833             {
3834                const double wx = Bc(qx,dx);
3835                const double wDx = Gc(qx,dx);
3836 
3837                for (int qy = 0; qy < Q1D; ++qy)
3838                {
3839                   for (int qz = 0; qz < Q1D; ++qz)
3840                   {
3841                      // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3842                      curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx;  // (u_2)_{x_1}
3843                      curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3844                   }
3845                }
3846             }
3847          }
3848       }
3849 
3850       // Apply D operator.
3851       for (int qz = 0; qz < Q1D; ++qz)
3852       {
3853          for (int qy = 0; qy < Q1D; ++qy)
3854          {
3855             for (int qx = 0; qx < Q1D; ++qx)
3856             {
3857                const double O11 = op(qx,qy,qz,0,e);
3858                const double O12 = op(qx,qy,qz,1,e);
3859                const double O13 = op(qx,qy,qz,2,e);
3860                const double O22 = op(qx,qy,qz,3,e);
3861                const double O23 = op(qx,qy,qz,4,e);
3862                const double O33 = op(qx,qy,qz,5,e);
3863 
3864                const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
3865                                  (O13 * curl[qz][qy][qx][2]);
3866                const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
3867                                  (O23 * curl[qz][qy][qx][2]);
3868                const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
3869                                  (O33 * curl[qz][qy][qx][2]);
3870 
3871                curl[qz][qy][qx][0] = c1;
3872                curl[qz][qy][qx][1] = c2;
3873                curl[qz][qy][qx][2] = c3;
3874             }
3875          }
3876       }
3877 
3878       for (int qz = 0; qz < Q1D; ++qz)
3879       {
3880          double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D];  // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
3881 
3882          osc = 0;
3883 
3884          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
3885          {
3886             const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
3887             const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
3888             const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
3889 
3890             for (int dy = 0; dy < D1Dy; ++dy)
3891             {
3892                for (int dx = 0; dx < D1Dx; ++dx)
3893                {
3894                   massXY[dy][dx] = 0;
3895                }
3896             }
3897             for (int qy = 0; qy < Q1D; ++qy)
3898             {
3899                double massX[HCURL_MAX_D1D];
3900                for (int dx = 0; dx < D1Dx; ++dx)
3901                {
3902                   massX[dx] = 0;
3903                }
3904                for (int qx = 0; qx < Q1D; ++qx)
3905                {
3906                   for (int dx = 0; dx < D1Dx; ++dx)
3907                   {
3908                      massX[dx] += curl[qz][qy][qx][c] *
3909                                   ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
3910                   }
3911                }
3912                for (int dy = 0; dy < D1Dy; ++dy)
3913                {
3914                   const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
3915                   for (int dx = 0; dx < D1Dx; ++dx)
3916                   {
3917                      massXY[dy][dx] += massX[dx] * wy;
3918                   }
3919                }
3920             }
3921 
3922             for (int dz = 0; dz < D1Dz; ++dz)
3923             {
3924                const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
3925                for (int dy = 0; dy < D1Dy; ++dy)
3926                {
3927                   for (int dx = 0; dx < D1Dx; ++dx)
3928                   {
3929                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
3930                         massXY[dy][dx] * wz;
3931                   }
3932                }
3933             }
3934 
3935             osc += D1Dx * D1Dy * D1Dz;
3936          }  // loop c
3937       }  // loop qz
3938    }); // end of element loop
3939 }
3940 
AddMultPA(const Vector & x,Vector & y) const3941 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3942 {
3943    if (testType == mfem::FiniteElement::CURL &&
3944        trialType == mfem::FiniteElement::CURL && dim == 3)
3945    {
3946       if (Device::Allows(Backend::DEVICE_MASK))
3947       {
3948          const int ID = (dofs1D << 4) | quad1D;
3949          switch (ID)
3950          {
3951             case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, coeffDim, ne,
3952                                                            mapsO->B, mapsC->B,
3953                                                            mapsC->G, pa_data, x, y);
3954             case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, coeffDim, ne,
3955                                                            mapsO->B, mapsC->B,
3956                                                            mapsC->G, pa_data, x, y);
3957             case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, coeffDim, ne,
3958                                                            mapsO->B, mapsC->B,
3959                                                            mapsC->G, pa_data, x, y);
3960             case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, coeffDim, ne,
3961                                                            mapsO->B, mapsC->B,
3962                                                            mapsC->G, pa_data, x, y);
3963             default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B,
3964                                                     mapsC->B,
3965                                                     mapsC->G, pa_data, x, y);
3966          }
3967       }
3968       else
3969          PAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
3970                           mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
3971    }
3972    else if (testType == mfem::FiniteElement::DIV &&
3973             trialType == mfem::FiniteElement::CURL && dim == 3)
3974       PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
3975                          mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
3976                          pa_data, x, y);
3977    else
3978    {
3979       MFEM_ABORT("Unsupported dimension or space!");
3980    }
3981 }
3982 
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)3983 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
3984                                                &trial_fes,
3985                                                const FiniteElementSpace &test_fes)
3986 {
3987    // Assumes tensor-product elements, with vector test and trial spaces.
3988    Mesh *mesh = trial_fes.GetMesh();
3989    const FiniteElement *trial_fel = trial_fes.GetFE(0);
3990    const FiniteElement *test_fel = test_fes.GetFE(0);
3991 
3992    const VectorTensorFiniteElement *trial_el =
3993       dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3994    MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
3995 
3996    const VectorTensorFiniteElement *test_el =
3997       dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
3998    MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
3999 
4000    const IntegrationRule *ir
4001       = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
4002                                                      *mesh->GetElementTransformation(0));
4003    const int dims = trial_el->GetDim();
4004    MFEM_VERIFY(dims == 3, "");
4005 
4006    const int nq = ir->GetNPoints();
4007    dim = mesh->Dimension();
4008    MFEM_VERIFY(dim == 3, "");
4009 
4010    MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
4011 
4012    ne = trial_fes.GetNE();
4013    geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
4014    mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
4015    mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
4016    dofs1D = mapsC->ndof;
4017    quad1D = mapsC->nqpt;
4018 
4019    MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
4020 
4021    coeffDim = DQ ? 3 : 1;
4022 
4023    pa_data.SetSize(coeffDim * nq * ne, Device::GetMemoryType());
4024 
4025    Vector coeff(coeffDim * nq * ne);
4026    coeff = 1.0;
4027    auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
4028    if (Q || DQ)
4029    {
4030       Vector V(coeffDim);
4031       if (DQ)
4032       {
4033          MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
4034       }
4035 
4036       for (int e=0; e<ne; ++e)
4037       {
4038          ElementTransformation *tr = mesh->GetElementTransformation(e);
4039 
4040          for (int p=0; p<nq; ++p)
4041          {
4042             if (DQ)
4043             {
4044                DQ->Eval(V, *tr, ir->IntPoint(p));
4045                for (int i=0; i<coeffDim; ++i)
4046                {
4047                   coeffh(i, p, e) = V[i];
4048                }
4049             }
4050             else
4051             {
4052                coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
4053             }
4054          }
4055       }
4056    }
4057 
4058    testType = test_el->GetDerivType();
4059    trialType = trial_el->GetDerivType();
4060 
4061    if (trialType == mfem::FiniteElement::CURL && dim == 3)
4062    {
4063       PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
4064    }
4065    else
4066    {
4067       MFEM_ABORT("Unknown kernel.");
4068    }
4069 }
4070 
4071 // Apply to x corresponding to DOF's in H(curl) (trial), integrated against curl
4072 // of H(curl) test functions corresponding to y.
4073 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
PAHcurlL2Apply3DTranspose(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & bot,const Array<double> & bct,const Array<double> & gct,const Vector & pa_data,const Vector & x,Vector & y)4074 static void PAHcurlL2Apply3DTranspose(const int D1D,
4075                                       const int Q1D,
4076                                       const int coeffDim,
4077                                       const int NE,
4078                                       const Array<double> &bo,
4079                                       const Array<double> &bc,
4080                                       const Array<double> &bot,
4081                                       const Array<double> &bct,
4082                                       const Array<double> &gct,
4083                                       const Vector &pa_data,
4084                                       const Vector &x,
4085                                       Vector &y)
4086 {
4087    // See PAHcurlL2Apply3D for comments.
4088 
4089    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4090    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4091 
4092    constexpr static int VDIM = 3;
4093 
4094    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4095    auto Bc = Reshape(bc.Read(), Q1D, D1D);
4096    auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
4097    auto Bct = Reshape(bct.Read(), D1D, Q1D);
4098    auto Gct = Reshape(gct.Read(), D1D, Q1D);
4099    auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4100    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4101    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4102 
4103    MFEM_FORALL(e, NE,
4104    {
4105       double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4106 
4107       for (int qz = 0; qz < Q1D; ++qz)
4108       {
4109          for (int qy = 0; qy < Q1D; ++qy)
4110          {
4111             for (int qx = 0; qx < Q1D; ++qx)
4112             {
4113                for (int c = 0; c < VDIM; ++c)
4114                {
4115                   mass[qz][qy][qx][c] = 0.0;
4116                }
4117             }
4118          }
4119       }
4120 
4121       int osc = 0;
4122 
4123       for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
4124       {
4125          const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4126          const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4127          const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4128 
4129          for (int dz = 0; dz < D1Dz; ++dz)
4130          {
4131             double massXY[MAX_Q1D][MAX_Q1D];
4132             for (int qy = 0; qy < Q1D; ++qy)
4133             {
4134                for (int qx = 0; qx < Q1D; ++qx)
4135                {
4136                   massXY[qy][qx] = 0.0;
4137                }
4138             }
4139 
4140             for (int dy = 0; dy < D1Dy; ++dy)
4141             {
4142                double massX[MAX_Q1D];
4143                for (int qx = 0; qx < Q1D; ++qx)
4144                {
4145                   massX[qx] = 0.0;
4146                }
4147 
4148                for (int dx = 0; dx < D1Dx; ++dx)
4149                {
4150                   const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4151                   for (int qx = 0; qx < Q1D; ++qx)
4152                   {
4153                      massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
4154                   }
4155                }
4156 
4157                for (int qy = 0; qy < Q1D; ++qy)
4158                {
4159                   const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
4160                   for (int qx = 0; qx < Q1D; ++qx)
4161                   {
4162                      const double wx = massX[qx];
4163                      massXY[qy][qx] += wx * wy;
4164                   }
4165                }
4166             }
4167 
4168             for (int qz = 0; qz < Q1D; ++qz)
4169             {
4170                const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
4171                for (int qy = 0; qy < Q1D; ++qy)
4172                {
4173                   for (int qx = 0; qx < Q1D; ++qx)
4174                   {
4175                      mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4176                   }
4177                }
4178             }
4179          }
4180 
4181          osc += D1Dx * D1Dy * D1Dz;
4182       }  // loop (c) over components
4183 
4184       // Apply D operator.
4185       for (int qz = 0; qz < Q1D; ++qz)
4186       {
4187          for (int qy = 0; qy < Q1D; ++qy)
4188          {
4189             for (int qx = 0; qx < Q1D; ++qx)
4190             {
4191                for (int c=0; c<VDIM; ++c)
4192                {
4193                   mass[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
4194                }
4195             }
4196          }
4197       }
4198 
4199       // x component
4200       osc = 0;
4201       {
4202          const int D1Dz = D1D;
4203          const int D1Dy = D1D;
4204          const int D1Dx = D1D - 1;
4205 
4206          for (int qz = 0; qz < Q1D; ++qz)
4207          {
4208             double gradXY12[MAX_D1D][MAX_D1D];
4209             double gradXY21[MAX_D1D][MAX_D1D];
4210 
4211             for (int dy = 0; dy < D1Dy; ++dy)
4212             {
4213                for (int dx = 0; dx < D1Dx; ++dx)
4214                {
4215                   gradXY12[dy][dx] = 0.0;
4216                   gradXY21[dy][dx] = 0.0;
4217                }
4218             }
4219             for (int qy = 0; qy < Q1D; ++qy)
4220             {
4221                double massX[MAX_D1D][2];
4222                for (int dx = 0; dx < D1Dx; ++dx)
4223                {
4224                   for (int n = 0; n < 2; ++n)
4225                   {
4226                      massX[dx][n] = 0.0;
4227                   }
4228                }
4229                for (int qx = 0; qx < Q1D; ++qx)
4230                {
4231                   for (int dx = 0; dx < D1Dx; ++dx)
4232                   {
4233                      const double wx = Bot(dx,qx);
4234 
4235                      massX[dx][0] += wx * mass[qz][qy][qx][1];
4236                      massX[dx][1] += wx * mass[qz][qy][qx][2];
4237                   }
4238                }
4239                for (int dy = 0; dy < D1Dy; ++dy)
4240                {
4241                   const double wy = Bct(dy,qy);
4242                   const double wDy = Gct(dy,qy);
4243 
4244                   for (int dx = 0; dx < D1Dx; ++dx)
4245                   {
4246                      gradXY21[dy][dx] += massX[dx][0] * wy;
4247                      gradXY12[dy][dx] += massX[dx][1] * wDy;
4248                   }
4249                }
4250             }
4251 
4252             for (int dz = 0; dz < D1Dz; ++dz)
4253             {
4254                const double wz = Bct(dz,qz);
4255                const double wDz = Gct(dz,qz);
4256                for (int dy = 0; dy < D1Dy; ++dy)
4257                {
4258                   for (int dx = 0; dx < D1Dx; ++dx)
4259                   {
4260                      // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4261                      // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
4262                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4263                        e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
4264                   }
4265                }
4266             }
4267          }  // loop qz
4268 
4269          osc += D1Dx * D1Dy * D1Dz;
4270       }
4271 
4272       // y component
4273       {
4274          const int D1Dz = D1D;
4275          const int D1Dy = D1D - 1;
4276          const int D1Dx = D1D;
4277 
4278          for (int qz = 0; qz < Q1D; ++qz)
4279          {
4280             double gradXY02[MAX_D1D][MAX_D1D];
4281             double gradXY20[MAX_D1D][MAX_D1D];
4282 
4283             for (int dy = 0; dy < D1Dy; ++dy)
4284             {
4285                for (int dx = 0; dx < D1Dx; ++dx)
4286                {
4287                   gradXY02[dy][dx] = 0.0;
4288                   gradXY20[dy][dx] = 0.0;
4289                }
4290             }
4291             for (int qx = 0; qx < Q1D; ++qx)
4292             {
4293                double massY[MAX_D1D][2];
4294                for (int dy = 0; dy < D1Dy; ++dy)
4295                {
4296                   massY[dy][0] = 0.0;
4297                   massY[dy][1] = 0.0;
4298                }
4299                for (int qy = 0; qy < Q1D; ++qy)
4300                {
4301                   for (int dy = 0; dy < D1Dy; ++dy)
4302                   {
4303                      const double wy = Bot(dy,qy);
4304 
4305                      massY[dy][0] += wy * mass[qz][qy][qx][2];
4306                      massY[dy][1] += wy * mass[qz][qy][qx][0];
4307                   }
4308                }
4309                for (int dx = 0; dx < D1Dx; ++dx)
4310                {
4311                   const double wx = Bct(dx,qx);
4312                   const double wDx = Gct(dx,qx);
4313 
4314                   for (int dy = 0; dy < D1Dy; ++dy)
4315                   {
4316                      gradXY02[dy][dx] += massY[dy][0] * wDx;
4317                      gradXY20[dy][dx] += massY[dy][1] * wx;
4318                   }
4319                }
4320             }
4321 
4322             for (int dz = 0; dz < D1Dz; ++dz)
4323             {
4324                const double wz = Bct(dz,qz);
4325                const double wDz = Gct(dz,qz);
4326                for (int dy = 0; dy < D1Dy; ++dy)
4327                {
4328                   for (int dx = 0; dx < D1Dx; ++dx)
4329                   {
4330                      // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4331                      // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
4332                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4333                        e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
4334                   }
4335                }
4336             }
4337          }  // loop qz
4338 
4339          osc += D1Dx * D1Dy * D1Dz;
4340       }
4341 
4342       // z component
4343       {
4344          const int D1Dz = D1D - 1;
4345          const int D1Dy = D1D;
4346          const int D1Dx = D1D;
4347 
4348          for (int qx = 0; qx < Q1D; ++qx)
4349          {
4350             double gradYZ01[MAX_D1D][MAX_D1D];
4351             double gradYZ10[MAX_D1D][MAX_D1D];
4352 
4353             for (int dy = 0; dy < D1Dy; ++dy)
4354             {
4355                for (int dz = 0; dz < D1Dz; ++dz)
4356                {
4357                   gradYZ01[dz][dy] = 0.0;
4358                   gradYZ10[dz][dy] = 0.0;
4359                }
4360             }
4361             for (int qy = 0; qy < Q1D; ++qy)
4362             {
4363                double massZ[MAX_D1D][2];
4364                for (int dz = 0; dz < D1Dz; ++dz)
4365                {
4366                   for (int n = 0; n < 2; ++n)
4367                   {
4368                      massZ[dz][n] = 0.0;
4369                   }
4370                }
4371                for (int qz = 0; qz < Q1D; ++qz)
4372                {
4373                   for (int dz = 0; dz < D1Dz; ++dz)
4374                   {
4375                      const double wz = Bot(dz,qz);
4376 
4377                      massZ[dz][0] += wz * mass[qz][qy][qx][0];
4378                      massZ[dz][1] += wz * mass[qz][qy][qx][1];
4379                   }
4380                }
4381                for (int dy = 0; dy < D1Dy; ++dy)
4382                {
4383                   const double wy = Bct(dy,qy);
4384                   const double wDy = Gct(dy,qy);
4385 
4386                   for (int dz = 0; dz < D1Dz; ++dz)
4387                   {
4388                      gradYZ01[dz][dy] += wy * massZ[dz][1];
4389                      gradYZ10[dz][dy] += wDy * massZ[dz][0];
4390                   }
4391                }
4392             }
4393 
4394             for (int dx = 0; dx < D1Dx; ++dx)
4395             {
4396                const double wx = Bct(dx,qx);
4397                const double wDx = Gct(dx,qx);
4398 
4399                for (int dy = 0; dy < D1Dy; ++dy)
4400                {
4401                   for (int dz = 0; dz < D1Dz; ++dz)
4402                   {
4403                      // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4404                      // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
4405                      Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4406                        e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
4407                   }
4408                }
4409             }
4410          }  // loop qx
4411       }
4412    });
4413 }
4414 
4415 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
SmemPAHcurlL2Apply3DTranspose(const int D1D,const int Q1D,const int coeffDim,const int NE,const Array<double> & bo,const Array<double> & bc,const Array<double> & gc,const Vector & pa_data,const Vector & x,Vector & y)4416 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
4417                                           const int Q1D,
4418                                           const int coeffDim,
4419                                           const int NE,
4420                                           const Array<double> &bo,
4421                                           const Array<double> &bc,
4422                                           const Array<double> &gc,
4423                                           const Vector &pa_data,
4424                                           const Vector &x,
4425                                           Vector &y)
4426 {
4427    MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4428    MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4429 
4430    auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4431    auto Bc = Reshape(bc.Read(), Q1D, D1D);
4432    auto Gc = Reshape(gc.Read(), Q1D, D1D);
4433    auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4434    auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4435    auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4436 
4437    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
4438    {
4439       constexpr int VDIM = 3;
4440       constexpr int maxCoeffDim = 3;
4441 
4442       MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
4443       MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
4444       MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
4445 
4446       double opc[maxCoeffDim];
4447       MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
4448       MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
4449 
4450       MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
4451 
4452       MFEM_FOREACH_THREAD(qx,x,Q1D)
4453       {
4454          MFEM_FOREACH_THREAD(qy,y,Q1D)
4455          {
4456             MFEM_FOREACH_THREAD(qz,z,Q1D)
4457             {
4458                for (int i=0; i<coeffDim; ++i)
4459                {
4460                   opc[i] = op(i,qx,qy,qz,e);
4461                }
4462             }
4463          }
4464       }
4465 
4466       const int tidx = MFEM_THREAD_ID(x);
4467       const int tidy = MFEM_THREAD_ID(y);
4468       const int tidz = MFEM_THREAD_ID(z);
4469 
4470       if (tidz == 0)
4471       {
4472          MFEM_FOREACH_THREAD(d,y,D1D)
4473          {
4474             MFEM_FOREACH_THREAD(q,x,Q1D)
4475             {
4476                sBc[d][q] = Bc(q,d);
4477                sGc[d][q] = Gc(q,d);
4478                if (d < D1D-1)
4479                {
4480                   sBo[d][q] = Bo(q,d);
4481                }
4482             }
4483          }
4484       }
4485       MFEM_SYNC_THREAD;
4486 
4487       for (int qz=0; qz < Q1D; ++qz)
4488       {
4489          if (tidz == qz)
4490          {
4491             MFEM_FOREACH_THREAD(qy,y,Q1D)
4492             {
4493                MFEM_FOREACH_THREAD(qx,x,Q1D)
4494                {
4495                   for (int i=0; i<3; ++i)
4496                   {
4497                      mass[qy][qx][i] = 0.0;
4498                   }
4499                }
4500             }
4501          }
4502 
4503          int osc = 0;
4504          for (int c = 0; c < VDIM; ++c)  // loop over x, y, z components
4505          {
4506             const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4507             const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4508             const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4509 
4510             MFEM_FOREACH_THREAD(dz,z,D1Dz)
4511             {
4512                MFEM_FOREACH_THREAD(dy,y,D1Dy)
4513                {
4514                   MFEM_FOREACH_THREAD(dx,x,D1Dx)
4515                   {
4516                      sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4517                   }
4518                }
4519             }
4520             MFEM_SYNC_THREAD;
4521 
4522             if (tidz == qz)
4523             {
4524                if (c == 0)
4525                {
4526                   for (int i=0; i<coeffDim; ++i)
4527                   {
4528                      sop[i][tidx][tidy] = opc[i];
4529                   }
4530                }
4531 
4532                MFEM_FOREACH_THREAD(qy,y,Q1D)
4533                {
4534                   MFEM_FOREACH_THREAD(qx,x,Q1D)
4535                   {
4536                      double u = 0.0;
4537 
4538                      for (int dz = 0; dz < D1Dz; ++dz)
4539                      {
4540                         const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
4541 
4542                         for (int dy = 0; dy < D1Dy; ++dy)
4543                         {
4544                            const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
4545 
4546                            for (int dx = 0; dx < D1Dx; ++dx)
4547                            {
4548                               const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
4549                               u += wx * wy * wz;
4550                            }
4551                         }
4552                      }
4553 
4554                      mass[qy][qx][c] += u;
4555                   } // qx
4556                } // qy
4557             } // tidz == qz
4558 
4559             osc += D1Dx * D1Dy * D1Dz;
4560             MFEM_SYNC_THREAD;
4561          } // c
4562 
4563          double dxyz1 = 0.0;
4564          double dxyz2 = 0.0;
4565          double dxyz3 = 0.0;
4566 
4567          MFEM_FOREACH_THREAD(dz,z,D1D)
4568          {
4569             const double wcz = sBc[dz][qz];
4570             const double wcDz = sGc[dz][qz];
4571             const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4572 
4573             MFEM_FOREACH_THREAD(dy,y,D1D)
4574             {
4575                MFEM_FOREACH_THREAD(dx,x,D1D)
4576                {
4577                   for (int qy = 0; qy < Q1D; ++qy)
4578                   {
4579                      const double wcy = sBc[dy][qy];
4580                      const double wcDy = sGc[dy][qy];
4581                      const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4582 
4583                      for (int qx = 0; qx < Q1D; ++qx)
4584                      {
4585                         const double O1 = sop[0][qx][qy];
4586                         const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
4587                         const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
4588 
4589                         const double c1 = O1 * mass[qy][qx][0];
4590                         const double c2 = O2 * mass[qy][qx][1];
4591                         const double c3 = O3 * mass[qy][qx][2];
4592 
4593                         const double wcx = sBc[dx][qx];
4594                         const double wDx = sGc[dx][qx];
4595 
4596                         if (dx < D1D-1)
4597                         {
4598                            const double wx = sBo[dx][qx];
4599                            dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
4600                         }
4601 
4602                         dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
4603 
4604                         dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
4605                      } // qx
4606                   } // qy
4607                } // dx
4608             } // dy
4609          } // dz
4610 
4611          MFEM_SYNC_THREAD;
4612 
4613          MFEM_FOREACH_THREAD(dz,z,D1D)
4614          {
4615             MFEM_FOREACH_THREAD(dy,y,D1D)
4616             {
4617                MFEM_FOREACH_THREAD(dx,x,D1D)
4618                {
4619                   if (dx < D1D-1)
4620                   {
4621                      Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4622                   }
4623                   if (dy < D1D-1)
4624                   {
4625                      Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4626                   }
4627                   if (dz < D1D-1)
4628                   {
4629                      Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4630                   }
4631                }
4632             }
4633          }
4634       } // qz
4635    }); // end of element loop
4636 }
4637 
AddMultPA(const Vector & x,Vector & y) const4638 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4639 {
4640    if (testType == mfem::FiniteElement::CURL &&
4641        trialType == mfem::FiniteElement::CURL && dim == 3)
4642    {
4643       if (Device::Allows(Backend::DEVICE_MASK))
4644       {
4645          const int ID = (dofs1D << 4) | quad1D;
4646          switch (ID)
4647          {
4648             case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, coeffDim,
4649                                                                     ne, mapsO->B, mapsC->B,
4650                                                                     mapsC->G, pa_data, x, y);
4651             case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, coeffDim,
4652                                                                     ne, mapsO->B, mapsC->B,
4653                                                                     mapsC->G, pa_data, x, y);
4654             case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, coeffDim,
4655                                                                     ne, mapsO->B, mapsC->B,
4656                                                                     mapsC->G, pa_data, x, y);
4657             case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, coeffDim,
4658                                                                     ne, mapsO->B, mapsC->B,
4659                                                                     mapsC->G, pa_data, x, y);
4660             default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne,
4661                                                              mapsO->B, mapsC->B,
4662                                                              mapsC->G, pa_data, x, y);
4663          }
4664       }
4665       else
4666          PAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
4667                                    mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
4668    }
4669    else
4670    {
4671       MFEM_ABORT("Unsupported dimension or space!");
4672    }
4673 }
4674 
4675 // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient
4676 // to get a dof in H(curl) (range). You can think of the range as the "test" space
4677 // and the domain as the "trial" space, but there's no integration.
PAHcurlApplyGradient2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)4678 static void PAHcurlApplyGradient2D(const int c_dofs1D,
4679                                    const int o_dofs1D,
4680                                    const int NE,
4681                                    const Array<double> &B_,
4682                                    const Array<double> &G_,
4683                                    const Vector &x_,
4684                                    Vector &y_)
4685 {
4686    auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4687    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4688 
4689    auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
4690    auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
4691 
4692    constexpr static int MAX_D1D = HCURL_MAX_D1D;
4693    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4694 
4695    MFEM_FORALL(e, NE,
4696    {
4697       double w[MAX_D1D][MAX_D1D];
4698 
4699       // horizontal part
4700       for (int dx = 0; dx < c_dofs1D; ++dx)
4701       {
4702          for (int ey = 0; ey < c_dofs1D; ++ey)
4703          {
4704             w[dx][ey] = 0.0;
4705             for (int dy = 0; dy < c_dofs1D; ++dy)
4706             {
4707                w[dx][ey] += B(ey, dy) * x(dx, dy, e);
4708             }
4709          }
4710       }
4711 
4712       for (int ey = 0; ey < c_dofs1D; ++ey)
4713       {
4714          for (int ex = 0; ex < o_dofs1D; ++ex)
4715          {
4716             double s = 0.0;
4717             for (int dx = 0; dx < c_dofs1D; ++dx)
4718             {
4719                s += G(ex, dx) * w[dx][ey];
4720             }
4721             const int local_index = ey*o_dofs1D + ex;
4722             y(local_index, e) += s;
4723          }
4724       }
4725 
4726       // vertical part
4727       for (int dx = 0; dx < c_dofs1D; ++dx)
4728       {
4729          for (int ey = 0; ey < o_dofs1D; ++ey)
4730          {
4731             w[dx][ey] = 0.0;
4732             for (int dy = 0; dy < c_dofs1D; ++dy)
4733             {
4734                w[dx][ey] += G(ey, dy) * x(dx, dy, e);
4735             }
4736          }
4737       }
4738 
4739       for (int ey = 0; ey < o_dofs1D; ++ey)
4740       {
4741          for (int ex = 0; ex < c_dofs1D; ++ex)
4742          {
4743             double s = 0.0;
4744             for (int dx = 0; dx < c_dofs1D; ++dx)
4745             {
4746                s += B(ex, dx) * w[dx][ey];
4747             }
4748             const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4749             y(local_index, e) += s;
4750          }
4751       }
4752    });
4753 }
4754 
4755 // Specialization of PAHcurlApplyGradient2D to the case where B is identity
PAHcurlApplyGradient2DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)4756 static void PAHcurlApplyGradient2DBId(const int c_dofs1D,
4757                                       const int o_dofs1D,
4758                                       const int NE,
4759                                       const Array<double> &G_,
4760                                       const Vector &x_,
4761                                       Vector &y_)
4762 {
4763    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4764 
4765    auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
4766    auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
4767 
4768    constexpr static int MAX_D1D = HCURL_MAX_D1D;
4769    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4770 
4771    MFEM_FORALL(e, NE,
4772    {
4773       double w[MAX_D1D][MAX_D1D];
4774 
4775       // horizontal part
4776       for (int dx = 0; dx < c_dofs1D; ++dx)
4777       {
4778          for (int ey = 0; ey < c_dofs1D; ++ey)
4779          {
4780             const int dy = ey;
4781             w[dx][ey] = x(dx, dy, e);
4782          }
4783       }
4784 
4785       for (int ey = 0; ey < c_dofs1D; ++ey)
4786       {
4787          for (int ex = 0; ex < o_dofs1D; ++ex)
4788          {
4789             double s = 0.0;
4790             for (int dx = 0; dx < c_dofs1D; ++dx)
4791             {
4792                s += G(ex, dx) * w[dx][ey];
4793             }
4794             const int local_index = ey*o_dofs1D + ex;
4795             y(local_index, e) += s;
4796          }
4797       }
4798 
4799       // vertical part
4800       for (int dx = 0; dx < c_dofs1D; ++dx)
4801       {
4802          for (int ey = 0; ey < o_dofs1D; ++ey)
4803          {
4804             w[dx][ey] = 0.0;
4805             for (int dy = 0; dy < c_dofs1D; ++dy)
4806             {
4807                w[dx][ey] += G(ey, dy) * x(dx, dy, e);
4808             }
4809          }
4810       }
4811 
4812       for (int ey = 0; ey < o_dofs1D; ++ey)
4813       {
4814          for (int ex = 0; ex < c_dofs1D; ++ex)
4815          {
4816             const int dx = ex;
4817             const double s = w[dx][ey];
4818             const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4819             y(local_index, e) += s;
4820          }
4821       }
4822    });
4823 }
4824 
PAHcurlApplyGradientTranspose2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)4825 static void PAHcurlApplyGradientTranspose2D(
4826    const int c_dofs1D, const int o_dofs1D, const int NE,
4827    const Array<double> &B_, const Array<double> &G_,
4828    const Vector &x_, Vector &y_)
4829 {
4830    auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4831    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4832 
4833    auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
4834    auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
4835 
4836    constexpr static int MAX_D1D = HCURL_MAX_D1D;
4837    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4838 
4839    MFEM_FORALL(e, NE,
4840    {
4841       double w[MAX_D1D][MAX_D1D];
4842 
4843       // horizontal part (open x, closed y)
4844       for (int dy = 0; dy < c_dofs1D; ++dy)
4845       {
4846          for (int ex = 0; ex < o_dofs1D; ++ex)
4847          {
4848             w[dy][ex] = 0.0;
4849             for (int ey = 0; ey < c_dofs1D; ++ey)
4850             {
4851                const int local_index = ey*o_dofs1D + ex;
4852                w[dy][ex] += B(ey, dy) * x(local_index, e);
4853             }
4854          }
4855       }
4856 
4857       for (int dy = 0; dy < c_dofs1D; ++dy)
4858       {
4859          for (int dx = 0; dx < c_dofs1D; ++dx)
4860          {
4861             double s = 0.0;
4862             for (int ex = 0; ex < o_dofs1D; ++ex)
4863             {
4864                s += G(ex, dx) * w[dy][ex];
4865             }
4866             y(dx, dy, e) += s;
4867          }
4868       }
4869 
4870       // vertical part (open y, closed x)
4871       for (int dy = 0; dy < c_dofs1D; ++dy)
4872       {
4873          for (int ex = 0; ex < c_dofs1D; ++ex)
4874          {
4875             w[dy][ex] = 0.0;
4876             for (int ey = 0; ey < o_dofs1D; ++ey)
4877             {
4878                const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4879                w[dy][ex] += G(ey, dy) * x(local_index, e);
4880             }
4881          }
4882       }
4883 
4884       for (int dy = 0; dy < c_dofs1D; ++dy)
4885       {
4886          for (int dx = 0; dx < c_dofs1D; ++dx)
4887          {
4888             double s = 0.0;
4889             for (int ex = 0; ex < c_dofs1D; ++ex)
4890             {
4891                s += B(ex, dx) * w[dy][ex];
4892             }
4893             y(dx, dy, e) += s;
4894          }
4895       }
4896    });
4897 }
4898 
4899 // Specialization of PAHcurlApplyGradientTranspose2D to the case where
4900 // B is identity
PAHcurlApplyGradientTranspose2DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)4901 static void PAHcurlApplyGradientTranspose2DBId(
4902    const int c_dofs1D, const int o_dofs1D, const int NE,
4903    const Array<double> &G_,
4904    const Vector &x_, Vector &y_)
4905 {
4906    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4907 
4908    auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
4909    auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
4910 
4911    constexpr static int MAX_D1D = HCURL_MAX_D1D;
4912    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4913 
4914    MFEM_FORALL(e, NE,
4915    {
4916       double w[MAX_D1D][MAX_D1D];
4917 
4918       // horizontal part (open x, closed y)
4919       for (int dy = 0; dy < c_dofs1D; ++dy)
4920       {
4921          for (int ex = 0; ex < o_dofs1D; ++ex)
4922          {
4923             const int ey = dy;
4924             const int local_index = ey*o_dofs1D + ex;
4925             w[dy][ex] = x(local_index, e);
4926          }
4927       }
4928 
4929       for (int dy = 0; dy < c_dofs1D; ++dy)
4930       {
4931          for (int dx = 0; dx < c_dofs1D; ++dx)
4932          {
4933             double s = 0.0;
4934             for (int ex = 0; ex < o_dofs1D; ++ex)
4935             {
4936                s += G(ex, dx) * w[dy][ex];
4937             }
4938             y(dx, dy, e) += s;
4939          }
4940       }
4941 
4942       // vertical part (open y, closed x)
4943       for (int dy = 0; dy < c_dofs1D; ++dy)
4944       {
4945          for (int ex = 0; ex < c_dofs1D; ++ex)
4946          {
4947             w[dy][ex] = 0.0;
4948             for (int ey = 0; ey < o_dofs1D; ++ey)
4949             {
4950                const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4951                w[dy][ex] += G(ey, dy) * x(local_index, e);
4952             }
4953          }
4954       }
4955 
4956       for (int dy = 0; dy < c_dofs1D; ++dy)
4957       {
4958          for (int dx = 0; dx < c_dofs1D; ++dx)
4959          {
4960             const int ex = dx;
4961             const double s = w[dy][ex];
4962             y(dx, dy, e) += s;
4963          }
4964       }
4965    });
4966 }
4967 
PAHcurlApplyGradient3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)4968 static void PAHcurlApplyGradient3D(const int c_dofs1D,
4969                                    const int o_dofs1D,
4970                                    const int NE,
4971                                    const Array<double> &B_,
4972                                    const Array<double> &G_,
4973                                    const Vector &x_,
4974                                    Vector &y_)
4975 {
4976    auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4977    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4978 
4979    auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
4980    auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
4981 
4982    constexpr static int MAX_D1D = HCURL_MAX_D1D;
4983    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4984 
4985    MFEM_FORALL(e, NE,
4986    {
4987       double w1[MAX_D1D][MAX_D1D][MAX_D1D];
4988       double w2[MAX_D1D][MAX_D1D][MAX_D1D];
4989 
4990       // ---
4991       // dofs that point parallel to x-axis (open in x, closed in y, z)
4992       // ---
4993 
4994       // contract in z
4995       for (int ez = 0; ez < c_dofs1D; ++ez)
4996       {
4997          for (int dx = 0; dx < c_dofs1D; ++dx)
4998          {
4999             for (int dy = 0; dy < c_dofs1D; ++dy)
5000             {
5001                w1[dx][dy][ez] = 0.0;
5002                for (int dz = 0; dz < c_dofs1D; ++dz)
5003                {
5004                   w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5005                }
5006             }
5007          }
5008       }
5009 
5010       // contract in y
5011       for (int ez = 0; ez < c_dofs1D; ++ez)
5012       {
5013          for (int ey = 0; ey < c_dofs1D; ++ey)
5014          {
5015             for (int dx = 0; dx < c_dofs1D; ++dx)
5016             {
5017                w2[dx][ey][ez] = 0.0;
5018                for (int dy = 0; dy < c_dofs1D; ++dy)
5019                {
5020                   w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5021                }
5022             }
5023          }
5024       }
5025 
5026       // contract in x
5027       for (int ez = 0; ez < c_dofs1D; ++ez)
5028       {
5029          for (int ey = 0; ey < c_dofs1D; ++ey)
5030          {
5031             for (int ex = 0; ex < o_dofs1D; ++ex)
5032             {
5033                double s = 0.0;
5034                for (int dx = 0; dx < c_dofs1D; ++dx)
5035                {
5036                   s += G(ex, dx) * w2[dx][ey][ez];
5037                }
5038                const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5039                y(local_index, e) += s;
5040             }
5041          }
5042       }
5043 
5044       // ---
5045       // dofs that point parallel to y-axis (open in y, closed in x, z)
5046       // ---
5047 
5048       // contract in z
5049       for (int ez = 0; ez < c_dofs1D; ++ez)
5050       {
5051          for (int dx = 0; dx < c_dofs1D; ++dx)
5052          {
5053             for (int dy = 0; dy < c_dofs1D; ++dy)
5054             {
5055                w1[dx][dy][ez] = 0.0;
5056                for (int dz = 0; dz < c_dofs1D; ++dz)
5057                {
5058                   w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5059                }
5060             }
5061          }
5062       }
5063 
5064       // contract in y
5065       for (int ez = 0; ez < c_dofs1D; ++ez)
5066       {
5067          for (int ey = 0; ey < o_dofs1D; ++ey)
5068          {
5069             for (int dx = 0; dx < c_dofs1D; ++dx)
5070             {
5071                w2[dx][ey][ez] = 0.0;
5072                for (int dy = 0; dy < c_dofs1D; ++dy)
5073                {
5074                   w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5075                }
5076             }
5077          }
5078       }
5079 
5080       // contract in x
5081       for (int ez = 0; ez < c_dofs1D; ++ez)
5082       {
5083          for (int ey = 0; ey < o_dofs1D; ++ey)
5084          {
5085             for (int ex = 0; ex < c_dofs1D; ++ex)
5086             {
5087                double s = 0.0;
5088                for (int dx = 0; dx < c_dofs1D; ++dx)
5089                {
5090                   s += B(ex, dx) * w2[dx][ey][ez];
5091                }
5092                const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5093                                        ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5094                y(local_index, e) += s;
5095             }
5096          }
5097       }
5098 
5099       // ---
5100       // dofs that point parallel to z-axis (open in z, closed in x, y)
5101       // ---
5102 
5103       // contract in z
5104       for (int ez = 0; ez < o_dofs1D; ++ez)
5105       {
5106          for (int dx = 0; dx < c_dofs1D; ++dx)
5107          {
5108             for (int dy = 0; dy < c_dofs1D; ++dy)
5109             {
5110                w1[dx][dy][ez] = 0.0;
5111                for (int dz = 0; dz < c_dofs1D; ++dz)
5112                {
5113                   w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5114                }
5115             }
5116          }
5117       }
5118 
5119       // contract in y
5120       for (int ez = 0; ez < o_dofs1D; ++ez)
5121       {
5122          for (int ey = 0; ey < c_dofs1D; ++ey)
5123          {
5124             for (int dx = 0; dx < c_dofs1D; ++dx)
5125             {
5126                w2[dx][ey][ez] = 0.0;
5127                for (int dy = 0; dy < c_dofs1D; ++dy)
5128                {
5129                   w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5130                }
5131             }
5132          }
5133       }
5134 
5135       // contract in x
5136       for (int ez = 0; ez < o_dofs1D; ++ez)
5137       {
5138          for (int ey = 0; ey < c_dofs1D; ++ey)
5139          {
5140             for (int ex = 0; ex < c_dofs1D; ++ex)
5141             {
5142                double s = 0.0;
5143                for (int dx = 0; dx < c_dofs1D; ++dx)
5144                {
5145                   s += B(ex, dx) * w2[dx][ey][ez];
5146                }
5147                const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5148                                        ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5149                y(local_index, e) += s;
5150             }
5151          }
5152       }
5153    });
5154 }
5155 
5156 // Specialization of PAHcurlApplyGradient3D to the case where
PAHcurlApplyGradient3DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)5157 static void PAHcurlApplyGradient3DBId(const int c_dofs1D,
5158                                       const int o_dofs1D,
5159                                       const int NE,
5160                                       const Array<double> &G_,
5161                                       const Vector &x_,
5162                                       Vector &y_)
5163 {
5164    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5165 
5166    auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5167    auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5168 
5169    constexpr static int MAX_D1D = HCURL_MAX_D1D;
5170    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5171 
5172    MFEM_FORALL(e, NE,
5173    {
5174       double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5175       double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5176 
5177       // ---
5178       // dofs that point parallel to x-axis (open in x, closed in y, z)
5179       // ---
5180 
5181       // contract in z
5182       for (int ez = 0; ez < c_dofs1D; ++ez)
5183       {
5184          for (int dx = 0; dx < c_dofs1D; ++dx)
5185          {
5186             for (int dy = 0; dy < c_dofs1D; ++dy)
5187             {
5188                const int dz = ez;
5189                w1[dx][dy][ez] = x(dx, dy, dz, e);
5190             }
5191          }
5192       }
5193 
5194       // contract in y
5195       for (int ez = 0; ez < c_dofs1D; ++ez)
5196       {
5197          for (int ey = 0; ey < c_dofs1D; ++ey)
5198          {
5199             for (int dx = 0; dx < c_dofs1D; ++dx)
5200             {
5201                const int dy = ey;
5202                w2[dx][ey][ez] = w1[dx][dy][ez];
5203             }
5204          }
5205       }
5206 
5207       // contract in x
5208       for (int ez = 0; ez < c_dofs1D; ++ez)
5209       {
5210          for (int ey = 0; ey < c_dofs1D; ++ey)
5211          {
5212             for (int ex = 0; ex < o_dofs1D; ++ex)
5213             {
5214                double s = 0.0;
5215                for (int dx = 0; dx < c_dofs1D; ++dx)
5216                {
5217                   s += G(ex, dx) * w2[dx][ey][ez];
5218                }
5219                const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5220                y(local_index, e) += s;
5221             }
5222          }
5223       }
5224 
5225       // ---
5226       // dofs that point parallel to y-axis (open in y, closed in x, z)
5227       // ---
5228 
5229       // contract in z
5230       for (int ez = 0; ez < c_dofs1D; ++ez)
5231       {
5232          for (int dx = 0; dx < c_dofs1D; ++dx)
5233          {
5234             for (int dy = 0; dy < c_dofs1D; ++dy)
5235             {
5236                const int dz = ez;
5237                w1[dx][dy][ez] = x(dx, dy, dz, e);
5238             }
5239          }
5240       }
5241 
5242       // contract in y
5243       for (int ez = 0; ez < c_dofs1D; ++ez)
5244       {
5245          for (int ey = 0; ey < o_dofs1D; ++ey)
5246          {
5247             for (int dx = 0; dx < c_dofs1D; ++dx)
5248             {
5249                w2[dx][ey][ez] = 0.0;
5250                for (int dy = 0; dy < c_dofs1D; ++dy)
5251                {
5252                   w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5253                }
5254             }
5255          }
5256       }
5257 
5258       // contract in x
5259       for (int ez = 0; ez < c_dofs1D; ++ez)
5260       {
5261          for (int ey = 0; ey < o_dofs1D; ++ey)
5262          {
5263             for (int ex = 0; ex < c_dofs1D; ++ex)
5264             {
5265                const int dx = ex;
5266                const double s = w2[dx][ey][ez];
5267                const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5268                                        ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5269                y(local_index, e) += s;
5270             }
5271          }
5272       }
5273 
5274       // ---
5275       // dofs that point parallel to z-axis (open in z, closed in x, y)
5276       // ---
5277 
5278       // contract in z
5279       for (int ez = 0; ez < o_dofs1D; ++ez)
5280       {
5281          for (int dx = 0; dx < c_dofs1D; ++dx)
5282          {
5283             for (int dy = 0; dy < c_dofs1D; ++dy)
5284             {
5285                w1[dx][dy][ez] = 0.0;
5286                for (int dz = 0; dz < c_dofs1D; ++dz)
5287                {
5288                   w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5289                }
5290             }
5291          }
5292       }
5293 
5294       // contract in y
5295       for (int ez = 0; ez < o_dofs1D; ++ez)
5296       {
5297          for (int ey = 0; ey < c_dofs1D; ++ey)
5298          {
5299             for (int dx = 0; dx < c_dofs1D; ++dx)
5300             {
5301                const int dy = ey;
5302                w2[dx][ey][ez] = w1[dx][dy][ez];
5303             }
5304          }
5305       }
5306 
5307       // contract in x
5308       for (int ez = 0; ez < o_dofs1D; ++ez)
5309       {
5310          for (int ey = 0; ey < c_dofs1D; ++ey)
5311          {
5312             for (int ex = 0; ex < c_dofs1D; ++ex)
5313             {
5314                const int dx = ex;
5315                const double s = w2[dx][ey][ez];
5316                const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5317                                        ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5318                y(local_index, e) += s;
5319             }
5320          }
5321       }
5322    });
5323 }
5324 
PAHcurlApplyGradientTranspose3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & B_,const Array<double> & G_,const Vector & x_,Vector & y_)5325 static void PAHcurlApplyGradientTranspose3D(
5326    const int c_dofs1D, const int o_dofs1D, const int NE,
5327    const Array<double> &B_, const Array<double> &G_,
5328    const Vector &x_, Vector &y_)
5329 {
5330    auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5331    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5332 
5333    auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5334    auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5335 
5336    constexpr static int MAX_D1D = HCURL_MAX_D1D;
5337    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5338 
5339    MFEM_FORALL(e, NE,
5340    {
5341       double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5342       double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5343       // ---
5344       // dofs that point parallel to x-axis (open in x, closed in y, z)
5345       // ---
5346 
5347       // contract in z
5348       for (int dz = 0; dz < c_dofs1D; ++dz)
5349       {
5350          for (int ex = 0; ex < o_dofs1D; ++ex)
5351          {
5352             for (int ey = 0; ey < c_dofs1D; ++ey)
5353             {
5354                w1[ex][ey][dz] = 0.0;
5355                for (int ez = 0; ez < c_dofs1D; ++ez)
5356                {
5357                   const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5358                   w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
5359                }
5360             }
5361          }
5362       }
5363 
5364       // contract in y
5365       for (int dz = 0; dz < c_dofs1D; ++dz)
5366       {
5367          for (int dy = 0; dy < c_dofs1D; ++dy)
5368          {
5369             for (int ex = 0; ex < o_dofs1D; ++ex)
5370             {
5371                w2[ex][dy][dz] = 0.0;
5372                for (int ey = 0; ey < c_dofs1D; ++ey)
5373                {
5374                   w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
5375                }
5376             }
5377          }
5378       }
5379 
5380       // contract in x
5381       for (int dz = 0; dz < c_dofs1D; ++dz)
5382       {
5383          for (int dy = 0; dy < c_dofs1D; ++dy)
5384          {
5385             for (int dx = 0; dx < c_dofs1D; ++dx)
5386             {
5387                double s = 0.0;
5388                for (int ex = 0; ex < o_dofs1D; ++ex)
5389                {
5390                   s += G(ex, dx) * w2[ex][dy][dz];
5391                }
5392                y(dx, dy, dz, e) += s;
5393             }
5394          }
5395       }
5396 
5397       // ---
5398       // dofs that point parallel to y-axis (open in y, closed in x, z)
5399       // ---
5400 
5401       // contract in z
5402       for (int dz = 0; dz < c_dofs1D; ++dz)
5403       {
5404          for (int ex = 0; ex < c_dofs1D; ++ex)
5405          {
5406             for (int ey = 0; ey < o_dofs1D; ++ey)
5407             {
5408                w1[ex][ey][dz] = 0.0;
5409                for (int ez = 0; ez < c_dofs1D; ++ez)
5410                {
5411                   const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5412                                           ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5413                   w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
5414                }
5415             }
5416          }
5417       }
5418 
5419       // contract in y
5420       for (int dz = 0; dz < c_dofs1D; ++dz)
5421       {
5422          for (int dy = 0; dy < c_dofs1D; ++dy)
5423          {
5424             for (int ex = 0; ex < c_dofs1D; ++ex)
5425             {
5426                w2[ex][dy][dz] = 0.0;
5427                for (int ey = 0; ey < o_dofs1D; ++ey)
5428                {
5429                   w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
5430                }
5431             }
5432          }
5433       }
5434 
5435       // contract in x
5436       for (int dz = 0; dz < c_dofs1D; ++dz)
5437       {
5438          for (int dy = 0; dy < c_dofs1D; ++dy)
5439          {
5440             for (int dx = 0; dx < c_dofs1D; ++dx)
5441             {
5442                double s = 0.0;
5443                for (int ex = 0; ex < c_dofs1D; ++ex)
5444                {
5445                   s += B(ex, dx) * w2[ex][dy][dz];
5446                }
5447                y(dx, dy, dz, e) += s;
5448             }
5449          }
5450       }
5451 
5452       // ---
5453       // dofs that point parallel to z-axis (open in z, closed in x, y)
5454       // ---
5455 
5456       // contract in z
5457       for (int dz = 0; dz < c_dofs1D; ++dz)
5458       {
5459          for (int ex = 0; ex < c_dofs1D; ++ex)
5460          {
5461             for (int ey = 0; ey < c_dofs1D; ++ey)
5462             {
5463                w1[ex][ey][dz] = 0.0;
5464                for (int ez = 0; ez < o_dofs1D; ++ez)
5465                {
5466                   const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5467                                           ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5468                   w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
5469                }
5470             }
5471          }
5472       }
5473 
5474       // contract in y
5475       for (int dz = 0; dz < c_dofs1D; ++dz)
5476       {
5477          for (int dy = 0; dy < c_dofs1D; ++dy)
5478          {
5479             for (int ex = 0; ex < c_dofs1D; ++ex)
5480             {
5481                w2[ex][dy][dz] = 0.0;
5482                for (int ey = 0; ey < c_dofs1D; ++ey)
5483                {
5484                   w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
5485                }
5486             }
5487          }
5488       }
5489 
5490       // contract in x
5491       for (int dz = 0; dz < c_dofs1D; ++dz)
5492       {
5493          for (int dy = 0; dy < c_dofs1D; ++dy)
5494          {
5495             for (int dx = 0; dx < c_dofs1D; ++dx)
5496             {
5497                double s = 0.0;
5498                for (int ex = 0; ex < c_dofs1D; ++ex)
5499                {
5500                   s += B(ex, dx) * w2[ex][dy][dz];
5501                }
5502                y(dx, dy, dz, e) += s;
5503             }
5504          }
5505       }
5506    });
5507 }
5508 
5509 // Specialization of PAHcurlApplyGradientTranspose3D to the case where
PAHcurlApplyGradientTranspose3DBId(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & G_,const Vector & x_,Vector & y_)5510 static void PAHcurlApplyGradientTranspose3DBId(
5511    const int c_dofs1D, const int o_dofs1D, const int NE,
5512    const Array<double> &G_,
5513    const Vector &x_, Vector &y_)
5514 {
5515    auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5516 
5517    auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5518    auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5519 
5520    constexpr static int MAX_D1D = HCURL_MAX_D1D;
5521    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5522 
5523    MFEM_FORALL(e, NE,
5524    {
5525       double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5526       double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5527       // ---
5528       // dofs that point parallel to x-axis (open in x, closed in y, z)
5529       // ---
5530 
5531       // contract in z
5532       for (int dz = 0; dz < c_dofs1D; ++dz)
5533       {
5534          for (int ex = 0; ex < o_dofs1D; ++ex)
5535          {
5536             for (int ey = 0; ey < c_dofs1D; ++ey)
5537             {
5538                const int ez = dz;
5539                const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5540                w1[ex][ey][dz] = x(local_index, e);
5541             }
5542          }
5543       }
5544 
5545       // contract in y
5546       for (int dz = 0; dz < c_dofs1D; ++dz)
5547       {
5548          for (int dy = 0; dy < c_dofs1D; ++dy)
5549          {
5550             for (int ex = 0; ex < o_dofs1D; ++ex)
5551             {
5552                const int ey = dy;
5553                w2[ex][dy][dz] = w1[ex][ey][dz];
5554             }
5555          }
5556       }
5557 
5558       // contract in x
5559       for (int dz = 0; dz < c_dofs1D; ++dz)
5560       {
5561          for (int dy = 0; dy < c_dofs1D; ++dy)
5562          {
5563             for (int dx = 0; dx < c_dofs1D; ++dx)
5564             {
5565                double s = 0.0;
5566                for (int ex = 0; ex < o_dofs1D; ++ex)
5567                {
5568                   s += G(ex, dx) * w2[ex][dy][dz];
5569                }
5570                y(dx, dy, dz, e) += s;
5571             }
5572          }
5573       }
5574 
5575       // ---
5576       // dofs that point parallel to y-axis (open in y, closed in x, z)
5577       // ---
5578 
5579       // contract in z
5580       for (int dz = 0; dz < c_dofs1D; ++dz)
5581       {
5582          for (int ex = 0; ex < c_dofs1D; ++ex)
5583          {
5584             for (int ey = 0; ey < o_dofs1D; ++ey)
5585             {
5586                const int ez = dz;
5587                const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5588                                        ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5589                w1[ex][ey][dz] = x(local_index, e);
5590             }
5591          }
5592       }
5593 
5594       // contract in y
5595       for (int dz = 0; dz < c_dofs1D; ++dz)
5596       {
5597          for (int dy = 0; dy < c_dofs1D; ++dy)
5598          {
5599             for (int ex = 0; ex < c_dofs1D; ++ex)
5600             {
5601                w2[ex][dy][dz] = 0.0;
5602                for (int ey = 0; ey < o_dofs1D; ++ey)
5603                {
5604                   w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
5605                }
5606             }
5607          }
5608       }
5609 
5610       // contract in x
5611       for (int dz = 0; dz < c_dofs1D; ++dz)
5612       {
5613          for (int dy = 0; dy < c_dofs1D; ++dy)
5614          {
5615             for (int dx = 0; dx < c_dofs1D; ++dx)
5616             {
5617                const int ex = dx;
5618                double s = w2[ex][dy][dz];
5619                y(dx, dy, dz, e) += s;
5620             }
5621          }
5622       }
5623 
5624       // ---
5625       // dofs that point parallel to z-axis (open in z, closed in x, y)
5626       // ---
5627 
5628       // contract in z
5629       for (int dz = 0; dz < c_dofs1D; ++dz)
5630       {
5631          for (int ex = 0; ex < c_dofs1D; ++ex)
5632          {
5633             for (int ey = 0; ey < c_dofs1D; ++ey)
5634             {
5635                w1[ex][ey][dz] = 0.0;
5636                for (int ez = 0; ez < o_dofs1D; ++ez)
5637                {
5638                   const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5639                                           ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5640                   w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
5641                }
5642             }
5643          }
5644       }
5645 
5646       // contract in y
5647       for (int dz = 0; dz < c_dofs1D; ++dz)
5648       {
5649          for (int dy = 0; dy < c_dofs1D; ++dy)
5650          {
5651             for (int ex = 0; ex < c_dofs1D; ++ex)
5652             {
5653                const int ey = dy;
5654                w2[ex][dy][dz] = w1[ex][ey][dz];
5655             }
5656          }
5657       }
5658 
5659       // contract in x
5660       for (int dz = 0; dz < c_dofs1D; ++dz)
5661       {
5662          for (int dy = 0; dy < c_dofs1D; ++dy)
5663          {
5664             for (int dx = 0; dx < c_dofs1D; ++dx)
5665             {
5666                const int ex = dx;
5667                double s = w2[ex][dy][dz];
5668                y(dx, dy, dz, e) += s;
5669             }
5670          }
5671       }
5672    });
5673 }
5674 
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)5675 void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
5676                                       const FiniteElementSpace &test_fes)
5677 {
5678    // Assumes tensor-product elements, with a vector test space and H^1 trial space.
5679    Mesh *mesh = trial_fes.GetMesh();
5680    const FiniteElement *trial_fel = trial_fes.GetFE(0);
5681    const FiniteElement *test_fel = test_fes.GetFE(0);
5682 
5683    const NodalTensorFiniteElement *trial_el =
5684       dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
5685    MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
5686 
5687    const VectorTensorFiniteElement *test_el =
5688       dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
5689    MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
5690 
5691    const int dims = trial_el->GetDim();
5692    MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!");
5693    dim = mesh->Dimension();
5694    MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!");
5695    MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(),
5696                "Orders do not match!");
5697    ne = trial_fes.GetNE();
5698 
5699    const int order = trial_el->GetOrder();
5700    dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType());
5701    mfem::QuadratureFunctions1D qf1d;
5702    mfem::IntegrationRule closed_ir;
5703    closed_ir.SetSize(order + 1);
5704    qf1d.GaussLobatto(order + 1, &closed_ir);
5705    mfem::IntegrationRule open_ir;
5706    open_ir.SetSize(order);
5707    qf1d.GaussLegendre(order, &open_ir);
5708 
5709    maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
5710    o_dofs1D = maps_O_C->nqpt;
5711    if (trial_el->GetBasisType() == BasisType::GaussLobatto)
5712    {
5713       B_id = true;
5714       c_dofs1D = maps_O_C->ndof;
5715    }
5716    else
5717    {
5718       B_id = false;
5719       maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
5720       c_dofs1D = maps_C_C->nqpt;
5721    }
5722 }
5723 
AddMultPA(const Vector & x,Vector & y) const5724 void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const
5725 {
5726    if (dim == 3)
5727    {
5728       if (B_id)
5729       {
5730          PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne,
5731                                    maps_O_C->G, x, y);
5732       }
5733       else
5734       {
5735          PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5736                                 maps_O_C->G, x, y);
5737       }
5738    }
5739    else if (dim == 2)
5740    {
5741       if (B_id)
5742       {
5743          PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne,
5744                                    maps_O_C->G, x, y);
5745       }
5746       else
5747       {
5748          PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G,
5749                                 x, y);
5750       }
5751    }
5752    else
5753    {
5754       mfem_error("Bad dimension!");
5755    }
5756 }
5757 
AddMultTransposePA(const Vector & x,Vector & y) const5758 void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
5759 {
5760    if (dim == 3)
5761    {
5762       if (B_id)
5763       {
5764          PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne,
5765                                             maps_O_C->G, x, y);
5766       }
5767       else
5768       {
5769          PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5770                                          maps_O_C->G, x, y);
5771       }
5772    }
5773    else if (dim == 2)
5774    {
5775       if (B_id)
5776       {
5777          PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne,
5778                                             maps_O_C->G, x, y);
5779       }
5780       else
5781       {
5782          PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5783                                          maps_O_C->G, x, y);
5784       }
5785    }
5786    else
5787    {
5788       mfem_error("Bad dimension!");
5789    }
5790 }
5791 
PAHcurlVecH1IdentityApply3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)5792 static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D,
5793                                         const int o_dofs1D,
5794                                         const int NE,
5795                                         const Array<double> &Bclosed,
5796                                         const Array<double> &Bopen,
5797                                         const Vector &pa_data,
5798                                         const Vector &x_,
5799                                         Vector &y_)
5800 {
5801    auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
5802    auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
5803 
5804    auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
5805    auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5806 
5807    auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
5808                      NE);
5809 
5810    constexpr static int MAX_D1D = HCURL_MAX_D1D;
5811    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5812 
5813    MFEM_FORALL(e, NE,
5814    {
5815       double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
5816       double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
5817 
5818       // dofs that point parallel to x-axis (open in x, closed in y, z)
5819 
5820       // contract in z
5821       for (int ez = 0; ez < c_dofs1D; ++ez)
5822       {
5823          for (int dx = 0; dx < c_dofs1D; ++dx)
5824          {
5825             for (int dy = 0; dy < c_dofs1D; ++dy)
5826             {
5827                for (int j=0; j<3; ++j)
5828                {
5829                   w1[j][dx][dy][ez] = 0.0;
5830                   for (int dz = 0; dz < c_dofs1D; ++dz)
5831                   {
5832                      w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
5833                   }
5834                }
5835             }
5836          }
5837       }
5838 
5839       // contract in y
5840       for (int ez = 0; ez < c_dofs1D; ++ez)
5841       {
5842          for (int ey = 0; ey < c_dofs1D; ++ey)
5843          {
5844             for (int dx = 0; dx < c_dofs1D; ++dx)
5845             {
5846                for (int j=0; j<3; ++j)
5847                {
5848                   w2[j][dx][ey][ez] = 0.0;
5849                   for (int dy = 0; dy < c_dofs1D; ++dy)
5850                   {
5851                      w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
5852                   }
5853                }
5854             }
5855          }
5856       }
5857 
5858       // contract in x
5859       for (int ez = 0; ez < c_dofs1D; ++ez)
5860       {
5861          for (int ey = 0; ey < c_dofs1D; ++ey)
5862          {
5863             for (int ex = 0; ex < o_dofs1D; ++ex)
5864             {
5865                for (int j=0; j<3; ++j)
5866                {
5867                   double s = 0.0;
5868                   for (int dx = 0; dx < c_dofs1D; ++dx)
5869                   {
5870                      s += Bo(ex, dx) * w2[j][dx][ey][ez];
5871                   }
5872                   const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5873                   y(local_index, e) += s * vk(j, local_index, e);
5874                }
5875             }
5876          }
5877       }
5878 
5879       // dofs that point parallel to y-axis (open in y, closed in x, z)
5880 
5881       // contract in z
5882       for (int ez = 0; ez < c_dofs1D; ++ez)
5883       {
5884          for (int dx = 0; dx < c_dofs1D; ++dx)
5885          {
5886             for (int dy = 0; dy < c_dofs1D; ++dy)
5887             {
5888                for (int j=0; j<3; ++j)
5889                {
5890                   w1[j][dx][dy][ez] = 0.0;
5891                   for (int dz = 0; dz < c_dofs1D; ++dz)
5892                   {
5893                      w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
5894                   }
5895                }
5896             }
5897          }
5898       }
5899 
5900       // contract in y
5901       for (int ez = 0; ez < c_dofs1D; ++ez)
5902       {
5903          for (int ey = 0; ey < o_dofs1D; ++ey)
5904          {
5905             for (int dx = 0; dx < c_dofs1D; ++dx)
5906             {
5907                for (int j=0; j<3; ++j)
5908                {
5909                   w2[j][dx][ey][ez] = 0.0;
5910                   for (int dy = 0; dy < c_dofs1D; ++dy)
5911                   {
5912                      w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez];
5913                   }
5914                }
5915             }
5916          }
5917       }
5918 
5919       // contract in x
5920       for (int ez = 0; ez < c_dofs1D; ++ez)
5921       {
5922          for (int ey = 0; ey < o_dofs1D; ++ey)
5923          {
5924             for (int ex = 0; ex < c_dofs1D; ++ex)
5925             {
5926                for (int j=0; j<3; ++j)
5927                {
5928                   double s = 0.0;
5929                   for (int dx = 0; dx < c_dofs1D; ++dx)
5930                   {
5931                      s += Bc(ex, dx) * w2[j][dx][ey][ez];
5932                   }
5933                   const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5934                                           ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5935                   y(local_index, e) += s * vk(j, local_index, e);
5936                }
5937             }
5938          }
5939       }
5940 
5941       // dofs that point parallel to z-axis (open in z, closed in x, y)
5942 
5943       // contract in z
5944       for (int ez = 0; ez < o_dofs1D; ++ez)
5945       {
5946          for (int dx = 0; dx < c_dofs1D; ++dx)
5947          {
5948             for (int dy = 0; dy < c_dofs1D; ++dy)
5949             {
5950                for (int j=0; j<3; ++j)
5951                {
5952                   w1[j][dx][dy][ez] = 0.0;
5953                   for (int dz = 0; dz < c_dofs1D; ++dz)
5954                   {
5955                      w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e);
5956                   }
5957                }
5958             }
5959          }
5960       }
5961 
5962       // contract in y
5963       for (int ez = 0; ez < o_dofs1D; ++ez)
5964       {
5965          for (int ey = 0; ey < c_dofs1D; ++ey)
5966          {
5967             for (int dx = 0; dx < c_dofs1D; ++dx)
5968             {
5969                for (int j=0; j<3; ++j)
5970                {
5971                   w2[j][dx][ey][ez] = 0.0;
5972                   for (int dy = 0; dy < c_dofs1D; ++dy)
5973                   {
5974                      w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
5975                   }
5976                }
5977             }
5978          }
5979       }
5980 
5981       // contract in x
5982       for (int ez = 0; ez < o_dofs1D; ++ez)
5983       {
5984          for (int ey = 0; ey < c_dofs1D; ++ey)
5985          {
5986             for (int ex = 0; ex < c_dofs1D; ++ex)
5987             {
5988                for (int j=0; j<3; ++j)
5989                {
5990                   double s = 0.0;
5991                   for (int dx = 0; dx < c_dofs1D; ++dx)
5992                   {
5993                      s += Bc(ex, dx) * w2[j][dx][ey][ez];
5994                   }
5995                   const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5996                                           ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5997                   y(local_index, e) += s * vk(j, local_index, e);
5998                }
5999             }
6000          }
6001       }
6002    });
6003 }
6004 
PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)6005 static void PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D,
6006                                                  const int o_dofs1D,
6007                                                  const int NE,
6008                                                  const Array<double> &Bclosed,
6009                                                  const Array<double> &Bopen,
6010                                                  const Vector &pa_data,
6011                                                  const Vector &x_,
6012                                                  Vector &y_)
6013 {
6014    auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6015    auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6016 
6017    auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6018    auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
6019 
6020    auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
6021                      NE);
6022 
6023    constexpr static int MAX_D1D = HCURL_MAX_D1D;
6024 
6025    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6026 
6027    MFEM_FORALL(e, NE,
6028    {
6029       double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
6030       double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
6031 
6032       // dofs that point parallel to x-axis (open in x, closed in y, z)
6033 
6034       // contract in x
6035       for (int ez = 0; ez < c_dofs1D; ++ez)
6036       {
6037          for (int ey = 0; ey < c_dofs1D; ++ey)
6038          {
6039             for (int j=0; j<3; ++j)
6040             {
6041                for (int dx = 0; dx < c_dofs1D; ++dx)
6042                {
6043                   w2[j][dx][ey][ez] = 0.0;
6044                }
6045                for (int ex = 0; ex < o_dofs1D; ++ex)
6046                {
6047                   const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6048                   const double xv = x(local_index, e) * vk(j, local_index, e);
6049                   for (int dx = 0; dx < c_dofs1D; ++dx)
6050                   {
6051                      w2[j][dx][ey][ez] += xv * Bo(ex, dx);
6052                   }
6053                }
6054             }
6055          }
6056       }
6057 
6058       // contract in y
6059       for (int ez = 0; ez < c_dofs1D; ++ez)
6060       {
6061          for (int dx = 0; dx < c_dofs1D; ++dx)
6062          {
6063             for (int dy = 0; dy < c_dofs1D; ++dy)
6064             {
6065                for (int j=0; j<3; ++j)
6066                {
6067                   w1[j][dx][dy][ez] = 0.0;
6068                   for (int ey = 0; ey < c_dofs1D; ++ey)
6069                   {
6070                      w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy);
6071                   }
6072                }
6073             }
6074          }
6075       }
6076 
6077       // contract in z
6078       for (int dx = 0; dx < c_dofs1D; ++dx)
6079       {
6080          for (int dy = 0; dy < c_dofs1D; ++dy)
6081          {
6082             for (int dz = 0; dz < c_dofs1D; ++dz)
6083             {
6084                for (int j=0; j<3; ++j)
6085                {
6086                   double s = 0.0;
6087                   for (int ez = 0; ez < c_dofs1D; ++ez)
6088                   {
6089                      s += w1[j][dx][dy][ez] * Bc(ez, dz);
6090                   }
6091                   y(dx, dy, dz, j, e) += s;
6092                }
6093             }
6094          }
6095       }
6096 
6097       // dofs that point parallel to y-axis (open in y, closed in x, z)
6098 
6099       // contract in x
6100       for (int ez = 0; ez < c_dofs1D; ++ez)
6101       {
6102          for (int ey = 0; ey < o_dofs1D; ++ey)
6103          {
6104             for (int j=0; j<3; ++j)
6105             {
6106                for (int dx = 0; dx < c_dofs1D; ++dx)
6107                {
6108                   w2[j][dx][ey][ez] = 0.0;
6109                }
6110                for (int ex = 0; ex < c_dofs1D; ++ex)
6111                {
6112                   const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6113                                           ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6114                   const double xv = x(local_index, e) * vk(j, local_index, e);
6115                   for (int dx = 0; dx < c_dofs1D; ++dx)
6116                   {
6117                      w2[j][dx][ey][ez] += xv * Bc(ex, dx);
6118                   }
6119                }
6120             }
6121          }
6122       }
6123 
6124       // contract in y
6125       for (int ez = 0; ez < c_dofs1D; ++ez)
6126       {
6127          for (int dx = 0; dx < c_dofs1D; ++dx)
6128          {
6129             for (int dy = 0; dy < c_dofs1D; ++dy)
6130             {
6131                for (int j=0; j<3; ++j)
6132                {
6133                   w1[j][dx][dy][ez] = 0.0;
6134                   for (int ey = 0; ey < o_dofs1D; ++ey)
6135                   {
6136                      w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bo(ey, dy);
6137                   }
6138                }
6139             }
6140          }
6141       }
6142 
6143       // contract in z
6144       for (int dx = 0; dx < c_dofs1D; ++dx)
6145       {
6146          for (int dy = 0; dy < c_dofs1D; ++dy)
6147          {
6148             for (int dz = 0; dz < c_dofs1D; ++dz)
6149             {
6150                for (int j=0; j<3; ++j)
6151                {
6152                   double s = 0.0;
6153                   for (int ez = 0; ez < c_dofs1D; ++ez)
6154                   {
6155                      s += w1[j][dx][dy][ez] * Bc(ez, dz);
6156                   }
6157                   y(dx, dy, dz, j, e) += s;
6158                }
6159             }
6160          }
6161       }
6162 
6163       // dofs that point parallel to z-axis (open in z, closed in x, y)
6164 
6165       // contract in x
6166       for (int ez = 0; ez < o_dofs1D; ++ez)
6167       {
6168          for (int ey = 0; ey < c_dofs1D; ++ey)
6169          {
6170             for (int j=0; j<3; ++j)
6171             {
6172                for (int dx = 0; dx < c_dofs1D; ++dx)
6173                {
6174                   w2[j][dx][ey][ez] = 0.0;
6175                }
6176                for (int ex = 0; ex < c_dofs1D; ++ex)
6177                {
6178                   const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6179                                           ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6180                   const double xv = x(local_index, e) * vk(j, local_index, e);
6181                   for (int dx = 0; dx < c_dofs1D; ++dx)
6182                   {
6183                      w2[j][dx][ey][ez] += xv * Bc(ex, dx);
6184                   }
6185                }
6186             }
6187          }
6188       }
6189 
6190       // contract in y
6191       for (int ez = 0; ez < o_dofs1D; ++ez)
6192       {
6193          for (int dx = 0; dx < c_dofs1D; ++dx)
6194          {
6195             for (int dy = 0; dy < c_dofs1D; ++dy)
6196             {
6197                for (int j=0; j<3; ++j)
6198                {
6199                   w1[j][dx][dy][ez] = 0.0;
6200                   for (int ey = 0; ey < c_dofs1D; ++ey)
6201                   {
6202                      w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy);
6203                   }
6204                }
6205             }
6206          }
6207       }
6208 
6209       // contract in z
6210       for (int dx = 0; dx < c_dofs1D; ++dx)
6211       {
6212          for (int dy = 0; dy < c_dofs1D; ++dy)
6213          {
6214             for (int dz = 0; dz < c_dofs1D; ++dz)
6215             {
6216                for (int j=0; j<3; ++j)
6217                {
6218                   double s = 0.0;
6219                   for (int ez = 0; ez < o_dofs1D; ++ez)
6220                   {
6221                      s += w1[j][dx][dy][ez] * Bo(ez, dz);
6222                   }
6223                   y(dx, dy, dz, j, e) += s;
6224                }
6225             }
6226          }
6227       }
6228    });
6229 }
6230 
PAHcurlVecH1IdentityApply2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)6231 static void PAHcurlVecH1IdentityApply2D(const int c_dofs1D,
6232                                         const int o_dofs1D,
6233                                         const int NE,
6234                                         const Array<double> &Bclosed,
6235                                         const Array<double> &Bopen,
6236                                         const Vector &pa_data,
6237                                         const Vector &x_,
6238                                         Vector &y_)
6239 {
6240    auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6241    auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6242 
6243    auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, 2, NE);
6244    auto y = Reshape(y_.ReadWrite(), (2 * c_dofs1D * o_dofs1D), NE);
6245 
6246    auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE);
6247 
6248    constexpr static int MAX_D1D = HCURL_MAX_D1D;
6249 
6250    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6251 
6252    MFEM_FORALL(e, NE,
6253    {
6254       double w[2][MAX_D1D][MAX_D1D];
6255 
6256       // dofs that point parallel to x-axis (open in x, closed in y)
6257 
6258       // contract in y
6259       for (int ey = 0; ey < c_dofs1D; ++ey)
6260       {
6261          for (int dx = 0; dx < c_dofs1D; ++dx)
6262          {
6263             for (int j=0; j<2; ++j)
6264             {
6265                w[j][dx][ey] = 0.0;
6266                for (int dy = 0; dy < c_dofs1D; ++dy)
6267                {
6268                   w[j][dx][ey] += Bc(ey, dy) * x(dx, dy, j, e);
6269                }
6270             }
6271          }
6272       }
6273 
6274       // contract in x
6275       for (int ey = 0; ey < c_dofs1D; ++ey)
6276       {
6277          for (int ex = 0; ex < o_dofs1D; ++ex)
6278          {
6279             for (int j=0; j<2; ++j)
6280             {
6281                double s = 0.0;
6282                for (int dx = 0; dx < c_dofs1D; ++dx)
6283                {
6284                   s += Bo(ex, dx) * w[j][dx][ey];
6285                }
6286                const int local_index = ey*o_dofs1D + ex;
6287                y(local_index, e) += s * vk(j, local_index, e);
6288             }
6289          }
6290       }
6291 
6292       // dofs that point parallel to y-axis (open in y, closed in x)
6293 
6294       // contract in y
6295       for (int ey = 0; ey < o_dofs1D; ++ey)
6296       {
6297          for (int dx = 0; dx < c_dofs1D; ++dx)
6298          {
6299             for (int j=0; j<2; ++j)
6300             {
6301                w[j][dx][ey] = 0.0;
6302                for (int dy = 0; dy < c_dofs1D; ++dy)
6303                {
6304                   w[j][dx][ey] += Bo(ey, dy) * x(dx, dy, j, e);
6305                }
6306             }
6307          }
6308       }
6309 
6310       // contract in x
6311       for (int ey = 0; ey < o_dofs1D; ++ey)
6312       {
6313          for (int ex = 0; ex < c_dofs1D; ++ex)
6314          {
6315             for (int j=0; j<2; ++j)
6316             {
6317                double s = 0.0;
6318                for (int dx = 0; dx < c_dofs1D; ++dx)
6319                {
6320                   s += Bc(ex, dx) * w[j][dx][ey];
6321                }
6322                const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6323                y(local_index, e) += s * vk(j, local_index, e);
6324             }
6325          }
6326       }
6327    });
6328 }
6329 
PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D,const int o_dofs1D,const int NE,const Array<double> & Bclosed,const Array<double> & Bopen,const Vector & pa_data,const Vector & x_,Vector & y_)6330 static void PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D,
6331                                                  const int o_dofs1D,
6332                                                  const int NE,
6333                                                  const Array<double> &Bclosed,
6334                                                  const Array<double> &Bopen,
6335                                                  const Vector &pa_data,
6336                                                  const Vector &x_,
6337                                                  Vector &y_)
6338 {
6339    auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6340    auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6341 
6342    auto x = Reshape(x_.Read(), (2 * c_dofs1D * o_dofs1D), NE);
6343    auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, 2, NE);
6344 
6345    auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE);
6346 
6347    constexpr static int MAX_D1D = HCURL_MAX_D1D;
6348    //constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
6349 
6350    MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6351 
6352    MFEM_FORALL(e, NE,
6353    {
6354       double w[2][MAX_D1D][MAX_D1D];
6355 
6356       // dofs that point parallel to x-axis (open in x, closed in y)
6357 
6358       // contract in x
6359       for (int ey = 0; ey < c_dofs1D; ++ey)
6360       {
6361          for (int dx = 0; dx < c_dofs1D; ++dx)
6362          {
6363             for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; }
6364          }
6365          for (int ex = 0; ex < o_dofs1D; ++ex)
6366          {
6367             const int local_index = ey*o_dofs1D + ex;
6368             const double xd = x(local_index, e);
6369 
6370             for (int dx = 0; dx < c_dofs1D; ++dx)
6371             {
6372                for (int j=0; j<2; ++j)
6373                {
6374                   w[j][dx][ey] += Bo(ex, dx) * xd * vk(j, local_index, e);
6375                }
6376             }
6377          }
6378       }
6379 
6380       // contract in y
6381       for (int dx = 0; dx < c_dofs1D; ++dx)
6382       {
6383          for (int dy = 0; dy < c_dofs1D; ++dy)
6384          {
6385             for (int j=0; j<2; ++j)
6386             {
6387                double s = 0.0;
6388                for (int ey = 0; ey < c_dofs1D; ++ey)
6389                {
6390                   s += w[j][dx][ey] * Bc(ey, dy);
6391                }
6392                y(dx, dy, j, e) += s;
6393             }
6394          }
6395       }
6396 
6397       // dofs that point parallel to y-axis (open in y, closed in x)
6398 
6399       // contract in x
6400       for (int ey = 0; ey < o_dofs1D; ++ey)
6401       {
6402          for (int dx = 0; dx < c_dofs1D; ++dx)
6403          {
6404             for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; }
6405          }
6406          for (int ex = 0; ex < c_dofs1D; ++ex)
6407          {
6408             const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6409             const double xd = x(local_index, e);
6410             for (int dx = 0; dx < c_dofs1D; ++dx)
6411             {
6412                for (int j=0; j<2; ++j)
6413                {
6414                   w[j][dx][ey] += Bc(ex, dx) * xd * vk(j, local_index, e);
6415                }
6416             }
6417          }
6418       }
6419 
6420       // contract in y
6421       for (int dx = 0; dx < c_dofs1D; ++dx)
6422       {
6423          for (int dy = 0; dy < c_dofs1D; ++dy)
6424          {
6425             for (int j=0; j<2; ++j)
6426             {
6427                double s = 0.0;
6428                for (int ey = 0; ey < o_dofs1D; ++ey)
6429                {
6430                   s += w[j][dx][ey] * Bo(ey, dy);
6431                }
6432                y(dx, dy, j, e) += s;
6433             }
6434          }
6435       }
6436    });
6437 }
6438 
AssemblePA(const FiniteElementSpace & trial_fes,const FiniteElementSpace & test_fes)6439 void IdentityInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
6440                                       const FiniteElementSpace &test_fes)
6441 {
6442    // Assumes tensor-product elements, with a vector test space and H^1 trial space.
6443    Mesh *mesh = trial_fes.GetMesh();
6444    const FiniteElement *trial_fel = trial_fes.GetFE(0);
6445    const FiniteElement *test_fel = test_fes.GetFE(0);
6446 
6447    const NodalTensorFiniteElement *trial_el =
6448       dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
6449    MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
6450 
6451    const VectorTensorFiniteElement *test_el =
6452       dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
6453    MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
6454 
6455    const int dims = trial_el->GetDim();
6456    MFEM_VERIFY(dims == 2 || dims == 3, "");
6457 
6458    dim = mesh->Dimension();
6459    MFEM_VERIFY(dim == 2 || dim == 3, "");
6460 
6461    MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
6462 
6463    ne = trial_fes.GetNE();
6464 
6465    const int order = trial_el->GetOrder();
6466    dofquad_fe = new H1_SegmentElement(order);
6467    mfem::QuadratureFunctions1D qf1d;
6468    mfem::IntegrationRule closed_ir;
6469    closed_ir.SetSize(order + 1);
6470    qf1d.GaussLobatto(order + 1, &closed_ir);
6471    mfem::IntegrationRule open_ir;
6472    open_ir.SetSize(order);
6473    qf1d.GaussLegendre(order, &open_ir);
6474 
6475    maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
6476    maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
6477 
6478    o_dofs1D = maps_O_C->nqpt;
6479    c_dofs1D = maps_C_C->nqpt;
6480    MFEM_VERIFY(maps_O_C->ndof == c_dofs1D &&
6481                maps_C_C->ndof == c_dofs1D, "Discrepancy in the number of DOFs");
6482 
6483    const int ndof_test = (dim == 3) ? 3 * c_dofs1D * c_dofs1D * o_dofs1D
6484                          : 2 * c_dofs1D * o_dofs1D;
6485 
6486    const IntegrationRule & Nodes = test_el->GetNodes();
6487 
6488    pa_data.SetSize(dim * ndof_test * ne, Device::GetMemoryType());
6489    auto op = Reshape(pa_data.HostWrite(), dim, ndof_test, ne);
6490 
6491    const Array<int> &dofmap = test_el->GetDofMap();
6492 
6493    if (dim == 3)
6494    {
6495       // Note that ND_HexahedronElement uses 6 vectors in tk rather than 3, with
6496       // the last 3 having negative signs. Here the signs are all positive, as
6497       // signs are applied in ElementRestriction.
6498 
6499       const double tk[9] = { 1.,0.,0.,  0.,1.,0.,  0.,0.,1. };
6500 
6501       for (int c=0; c<3; ++c)
6502       {
6503          for (int i=0; i<ndof_test/3; ++i)
6504          {
6505             const int d = (c*ndof_test/3) + i;
6506             // ND_HexahedronElement sets dof2tk = (dofmap < 0) ? 3+c : c, but here
6507             // no signs should be applied due to ElementRestriction.
6508             const int dof2tk = c;
6509             const int id = (dofmap[d] >= 0) ? dofmap[d] : -1 - dofmap[d];
6510 
6511             for (int e=0; e<ne; ++e)
6512             {
6513                double v[3];
6514                ElementTransformation *tr = mesh->GetElementTransformation(e);
6515                tr->SetIntPoint(&Nodes.IntPoint(id));
6516                tr->Jacobian().Mult(tk + dof2tk*dim, v);
6517 
6518                for (int j=0; j<3; ++j)
6519                {
6520                   op(j,d,e) = v[j];
6521                }
6522             }
6523          }
6524       }
6525    }
6526    else // 2D case
6527    {
6528       const double tk[4] = { 1.,0.,  0.,1. };
6529       for (int c=0; c<2; ++c)
6530       {
6531          for (int i=0; i<ndof_test/2; ++i)
6532          {
6533             const int d = (c*ndof_test/2) + i;
6534             // ND_QuadrilateralElement sets dof2tk = (dofmap < 0) ? 2+c : c, but here
6535             // no signs should be applied due to ElementRestriction.
6536             const int dof2tk = c;
6537             const int id = (dofmap[d] >= 0) ? dofmap[d] : -1 - dofmap[d];
6538 
6539             for (int e=0; e<ne; ++e)
6540             {
6541                double v[2];
6542                ElementTransformation *tr = mesh->GetElementTransformation(e);
6543                tr->SetIntPoint(&Nodes.IntPoint(id));
6544                tr->Jacobian().Mult(tk + dof2tk*dim, v);
6545 
6546                for (int j=0; j<2; ++j)
6547                {
6548                   op(j,d,e) = v[j];
6549                }
6550             }
6551          }
6552       }
6553    }
6554 }
6555 
AddMultPA(const Vector & x,Vector & y) const6556 void IdentityInterpolator::AddMultPA(const Vector &x, Vector &y) const
6557 {
6558    if (dim == 3)
6559    {
6560       PAHcurlVecH1IdentityApply3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B,
6561                                   pa_data, x, y);
6562    }
6563    else if (dim == 2)
6564    {
6565       PAHcurlVecH1IdentityApply2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B,
6566                                   pa_data, x, y);
6567    }
6568    else
6569    {
6570       mfem_error("Bad dimension!");
6571    }
6572 }
6573 
AddMultTransposePA(const Vector & x,Vector & y) const6574 void IdentityInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
6575 {
6576    if (dim == 3)
6577    {
6578       PAHcurlVecH1IdentityApplyTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6579                                            maps_O_C->B, pa_data, x, y);
6580    }
6581    else if (dim == 2)
6582    {
6583       PAHcurlVecH1IdentityApplyTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6584                                            maps_O_C->B, pa_data, x, y);
6585    }
6586    else
6587    {
6588       mfem_error("Bad dimension!");
6589    }
6590 }
6591 
6592 template void SmemPAHcurlMassAssembleDiagonal3D<0,0>(const int D1D,
6593                                                      const int Q1D,
6594                                                      const int NE,
6595                                                      const bool symmetric,
6596                                                      const Array<double> &bo,
6597                                                      const Array<double> &bc,
6598                                                      const Vector &pa_data,
6599                                                      Vector &diag);
6600 
6601 template void SmemPAHcurlMassAssembleDiagonal3D<2,3>(const int D1D,
6602                                                      const int Q1D,
6603                                                      const int NE,
6604                                                      const bool symmetric,
6605                                                      const Array<double> &bo,
6606                                                      const Array<double> &bc,
6607                                                      const Vector &pa_data,
6608                                                      Vector &diag);
6609 
6610 template void SmemPAHcurlMassAssembleDiagonal3D<3,4>(const int D1D,
6611                                                      const int Q1D,
6612                                                      const int NE,
6613                                                      const bool symmetric,
6614                                                      const Array<double> &bo,
6615                                                      const Array<double> &bc,
6616                                                      const Vector &pa_data,
6617                                                      Vector &diag);
6618 
6619 template void SmemPAHcurlMassAssembleDiagonal3D<4,5>(const int D1D,
6620                                                      const int Q1D,
6621                                                      const int NE,
6622                                                      const bool symmetric,
6623                                                      const Array<double> &bo,
6624                                                      const Array<double> &bc,
6625                                                      const Vector &pa_data,
6626                                                      Vector &diag);
6627 
6628 template void SmemPAHcurlMassAssembleDiagonal3D<5,6>(const int D1D,
6629                                                      const int Q1D,
6630                                                      const int NE,
6631                                                      const bool symmetric,
6632                                                      const Array<double> &bo,
6633                                                      const Array<double> &bc,
6634                                                      const Vector &pa_data,
6635                                                      Vector &diag);
6636 
6637 template void SmemPAHcurlMassApply3D<0,0>(const int D1D,
6638                                           const int Q1D,
6639                                           const int NE,
6640                                           const bool symmetric,
6641                                           const Array<double> &bo,
6642                                           const Array<double> &bc,
6643                                           const Array<double> &bot,
6644                                           const Array<double> &bct,
6645                                           const Vector &pa_data,
6646                                           const Vector &x,
6647                                           Vector &y);
6648 
6649 template void SmemPAHcurlMassApply3D<2,3>(const int D1D,
6650                                           const int Q1D,
6651                                           const int NE,
6652                                           const bool symmetric,
6653                                           const Array<double> &bo,
6654                                           const Array<double> &bc,
6655                                           const Array<double> &bot,
6656                                           const Array<double> &bct,
6657                                           const Vector &pa_data,
6658                                           const Vector &x,
6659                                           Vector &y);
6660 
6661 template void SmemPAHcurlMassApply3D<3,4>(const int D1D,
6662                                           const int Q1D,
6663                                           const int NE,
6664                                           const bool symmetric,
6665                                           const Array<double> &bo,
6666                                           const Array<double> &bc,
6667                                           const Array<double> &bot,
6668                                           const Array<double> &bct,
6669                                           const Vector &pa_data,
6670                                           const Vector &x,
6671                                           Vector &y);
6672 
6673 template void SmemPAHcurlMassApply3D<4,5>(const int D1D,
6674                                           const int Q1D,
6675                                           const int NE,
6676                                           const bool symmetric,
6677                                           const Array<double> &bo,
6678                                           const Array<double> &bc,
6679                                           const Array<double> &bot,
6680                                           const Array<double> &bct,
6681                                           const Vector &pa_data,
6682                                           const Vector &x,
6683                                           Vector &y);
6684 
6685 template void SmemPAHcurlMassApply3D<5,6>(const int D1D,
6686                                           const int Q1D,
6687                                           const int NE,
6688                                           const bool symmetric,
6689                                           const Array<double> &bo,
6690                                           const Array<double> &bc,
6691                                           const Array<double> &bot,
6692                                           const Array<double> &bct,
6693                                           const Vector &pa_data,
6694                                           const Vector &x,
6695                                           Vector &y);
6696 
6697 } // namespace mfem
6698