1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "ceed/diffusion.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 // PA Vector Diffusion Integrator
23 
24 // PA Diffusion Assemble 2D kernel
PAVectorDiffusionSetup2D(const int Q1D,const int NE,const Array<double> & w,const Vector & j,const Vector & c,Vector & op)25 static void PAVectorDiffusionSetup2D(const int Q1D,
26                                      const int NE,
27                                      const Array<double> &w,
28                                      const Vector &j,
29                                      const Vector &c,
30                                      Vector &op)
31 {
32    const int NQ = Q1D*Q1D;
33    auto W = w.Read();
34 
35    auto J = Reshape(j.Read(), NQ, 2, 2, NE);
36    auto y = Reshape(op.Write(), NQ, 3, NE);
37 
38    const bool const_c = c.Size() == 1;
39    const auto C = const_c ? Reshape(c.Read(), 1,1) :
40                   Reshape(c.Read(), NQ, NE);
41 
42 
43    MFEM_FORALL(e, NE,
44    {
45       for (int q = 0; q < NQ; ++q)
46       {
47          const double J11 = J(q,0,0,e);
48          const double J21 = J(q,1,0,e);
49          const double J12 = J(q,0,1,e);
50          const double J22 = J(q,1,1,e);
51 
52          const double C1 = const_c ? C(0,0) : C(q,e);
53          const double c_detJ = W[q] * C1 / ((J11*J22)-(J21*J12));
54          y(q,0,e) =  c_detJ * (J12*J12 + J22*J22); // 1,1
55          y(q,1,e) = -c_detJ * (J12*J11 + J22*J21); // 1,2
56          y(q,2,e) =  c_detJ * (J11*J11 + J21*J21); // 2,2
57       }
58    });
59 }
60 
61 // PA Diffusion Assemble 3D kernel
PAVectorDiffusionSetup3D(const int Q1D,const int NE,const Array<double> & w,const Vector & j,const Vector & c,Vector & op)62 static void PAVectorDiffusionSetup3D(const int Q1D,
63                                      const int NE,
64                                      const Array<double> &w,
65                                      const Vector &j,
66                                      const Vector &c,
67                                      Vector &op)
68 {
69    const int NQ = Q1D*Q1D*Q1D;
70    auto W = w.Read();
71    auto J = Reshape(j.Read(), NQ, 3, 3, NE);
72    auto y = Reshape(op.Write(), NQ, 6, NE);
73 
74    const bool const_c = c.Size() == 1;
75    const auto C = const_c ? Reshape(c.Read(), 1,1) :
76                   Reshape(c.Read(), NQ,NE);
77 
78 
79    MFEM_FORALL(e, NE,
80    {
81       for (int q = 0; q < NQ; ++q)
82       {
83          const double J11 = J(q,0,0,e);
84          const double J21 = J(q,1,0,e);
85          const double J31 = J(q,2,0,e);
86          const double J12 = J(q,0,1,e);
87          const double J22 = J(q,1,1,e);
88          const double J32 = J(q,2,1,e);
89          const double J13 = J(q,0,2,e);
90          const double J23 = J(q,1,2,e);
91          const double J33 = J(q,2,2,e);
92          const double detJ = J11 * (J22 * J33 - J32 * J23) -
93          /* */               J21 * (J12 * J33 - J32 * J13) +
94          /* */               J31 * (J12 * J23 - J22 * J13);
95 
96          const double C1 = const_c ? C(0,0) : C(q,e);
97 
98          const double c_detJ = W[q] * C1 / detJ;
99          // adj(J)
100          const double A11 = (J22 * J33) - (J23 * J32);
101          const double A12 = (J32 * J13) - (J12 * J33);
102          const double A13 = (J12 * J23) - (J22 * J13);
103          const double A21 = (J31 * J23) - (J21 * J33);
104          const double A22 = (J11 * J33) - (J13 * J31);
105          const double A23 = (J21 * J13) - (J11 * J23);
106          const double A31 = (J21 * J32) - (J31 * J22);
107          const double A32 = (J31 * J12) - (J11 * J32);
108          const double A33 = (J11 * J22) - (J12 * J21);
109          // detJ J^{-1} J^{-T} = (1/detJ) adj(J) adj(J)^T
110          y(q,0,e) = c_detJ * (A11*A11 + A12*A12 + A13*A13); // 1,1
111          y(q,1,e) = c_detJ * (A11*A21 + A12*A22 + A13*A23); // 2,1
112          y(q,2,e) = c_detJ * (A11*A31 + A12*A32 + A13*A33); // 3,1
113          y(q,3,e) = c_detJ * (A21*A21 + A22*A22 + A23*A23); // 2,2
114          y(q,4,e) = c_detJ * (A21*A31 + A22*A32 + A23*A33); // 3,2
115          y(q,5,e) = c_detJ * (A31*A31 + A32*A32 + A33*A33); // 3,3
116       }
117    });
118 }
119 
PAVectorDiffusionSetup(const int dim,const int Q1D,const int NE,const Array<double> & W,const Vector & J,const Vector & C,Vector & op)120 static void PAVectorDiffusionSetup(const int dim,
121                                    const int Q1D,
122                                    const int NE,
123                                    const Array<double> &W,
124                                    const Vector &J,
125                                    const Vector &C,
126                                    Vector &op)
127 {
128    if (!(dim == 2 || dim == 3))
129    {
130       MFEM_ABORT("Dimension not supported.");
131    }
132    if (dim == 2)
133    {
134       PAVectorDiffusionSetup2D(Q1D, NE, W, J, C, op);
135    }
136    if (dim == 3)
137    {
138       PAVectorDiffusionSetup3D(Q1D, NE, W, J, C, op);
139    }
140 }
141 
AssemblePA(const FiniteElementSpace & fes)142 void VectorDiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes)
143 {
144    // Assumes tensor-product elements
145    Mesh *mesh = fes.GetMesh();
146    const FiniteElement &el = *fes.GetFE(0);
147    const IntegrationRule *ir
148       = IntRule ? IntRule : &DiffusionIntegrator::GetRule(el, el);
149    if (DeviceCanUseCeed())
150    {
151       delete ceedOp;
152       ceedOp = new ceed::PADiffusionIntegrator(fes, *ir, Q);
153       return;
154    }
155    const int dims = el.GetDim();
156    const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
157    const int nq = ir->GetNPoints();
158    dim = mesh->Dimension();
159    sdim = mesh->SpaceDimension();
160    ne = fes.GetNE();
161    geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
162    maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
163    dofs1D = maps->ndof;
164    quad1D = maps->nqpt;
165    pa_data.SetSize(symmDims * nq * ne, Device::GetDeviceMemoryType());
166 
167    MFEM_VERIFY(!VQ && !MQ,
168                "Only scalar coefficient supported for partial assembly for VectorDiffusionIntegrator");
169    Vector coeff;
170    if (Q == nullptr)
171    {
172       coeff.SetSize(1);
173       coeff(0) = 1.0;
174    }
175    else if (ConstantCoefficient* cQ = dynamic_cast<ConstantCoefficient*>(Q))
176    {
177       coeff.SetSize(1);
178       coeff(0) = cQ->constant;
179    }
180    else if (QuadratureFunctionCoefficient* cQ =
181                dynamic_cast<QuadratureFunctionCoefficient*>(Q))
182    {
183       const QuadratureFunction &qFun = cQ->GetQuadFunction();
184       MFEM_VERIFY(qFun.Size() == ne*nq,
185                   "Incompatible QuadratureFunction dimension \n");
186 
187       MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
188                   "IntegrationRule used within integrator and in"
189                   " QuadratureFunction appear to be different");
190       qFun.Read();
191       coeff.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
192    }
193    else
194    {
195       coeff.SetSize(nq * ne);
196       auto Co = Reshape(coeff.HostWrite(), nq, ne);
197       for (int e = 0; e < ne; ++e)
198       {
199          ElementTransformation& T = *fes.GetElementTransformation(e);
200          for (int q = 0; q < nq; ++q)
201          {
202             Co(q,e) = Q->Eval(T, ir->IntPoint(q));
203          }
204       }
205    }
206 
207    const Array<double> &w = ir->GetWeights();
208    const Vector &j = geom->J;
209    Vector &d = pa_data;
210    if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAVectorDiffusionSetup"); }
211    if (dim == 2 && sdim == 3)
212    {
213       constexpr int DIM = 2;
214       constexpr int SDIM = 3;
215       const int NQ = quad1D*quad1D;
216       auto W = w.Read();
217       auto J = Reshape(j.Read(), NQ, SDIM, DIM, ne);
218       auto D = Reshape(d.Write(), NQ, SDIM, ne);
219 
220       const bool const_c = coeff.Size() == 1;
221       const auto C = const_c ? Reshape(coeff.Read(), 1,1) :
222                      Reshape(coeff.Read(), NQ,ne);
223 
224       MFEM_FORALL(e, ne,
225       {
226          for (int q = 0; q < NQ; ++q)
227          {
228             const double wq = W[q];
229             const double J11 = J(q,0,0,e);
230             const double J21 = J(q,1,0,e);
231             const double J31 = J(q,2,0,e);
232             const double J12 = J(q,0,1,e);
233             const double J22 = J(q,1,1,e);
234             const double J32 = J(q,2,1,e);
235             const double E = J11*J11 + J21*J21 + J31*J31;
236             const double G = J12*J12 + J22*J22 + J32*J32;
237             const double F = J11*J12 + J21*J22 + J31*J32;
238             const double iw = 1.0 / sqrt(E*G - F*F);
239             const double C1 = const_c ? C(0,0) : C(q,e);
240             const double alpha = wq * C1 * iw;
241             D(q,0,e) =  alpha * G; // 1,1
242             D(q,1,e) = -alpha * F; // 1,2
243             D(q,2,e) =  alpha * E; // 2,2
244          }
245       });
246    }
247    else
248    {
249       PAVectorDiffusionSetup(dim, quad1D, ne, w, j, coeff, d);
250    }
251 }
252 
253 // PA Diffusion Apply 2D kernel
254 template<int T_D1D = 0, int T_Q1D = 0, int T_VDIM = 0> static
PAVectorDiffusionApply2D(const int NE,const Array<double> & b,const Array<double> & g,const Array<double> & bt,const Array<double> & gt,const Vector & d_,const Vector & x_,Vector & y_,const int d1d=0,const int q1d=0,const int vdim=0)255 void PAVectorDiffusionApply2D(const int NE,
256                               const Array<double> &b,
257                               const Array<double> &g,
258                               const Array<double> &bt,
259                               const Array<double> &gt,
260                               const Vector &d_,
261                               const Vector &x_,
262                               Vector &y_,
263                               const int d1d = 0,
264                               const int q1d = 0,
265                               const int vdim = 0)
266 {
267    const int D1D = T_D1D ? T_D1D : d1d;
268    const int Q1D = T_Q1D ? T_Q1D : q1d;
269    const int VDIM = T_VDIM ? T_VDIM : vdim;
270    MFEM_VERIFY(D1D <= MAX_D1D, "");
271    MFEM_VERIFY(Q1D <= MAX_Q1D, "");
272    auto B = Reshape(b.Read(), Q1D, D1D);
273    auto G = Reshape(g.Read(), Q1D, D1D);
274    auto Bt = Reshape(bt.Read(), D1D, Q1D);
275    auto Gt = Reshape(gt.Read(), D1D, Q1D);
276    auto D = Reshape(d_.Read(), Q1D*Q1D, 3, NE);
277    auto x = Reshape(x_.Read(), D1D, D1D, VDIM, NE);
278    auto y = Reshape(y_.ReadWrite(), D1D, D1D, VDIM, NE);
279    MFEM_FORALL(e, NE,
280    {
281       const int D1D = T_D1D ? T_D1D : d1d;
282       const int Q1D = T_Q1D ? T_Q1D : q1d;
283       const int VDIM = T_VDIM ? T_VDIM : vdim;
284       constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
285       constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
286 
287       double grad[max_Q1D][max_Q1D][2];
288       for (int c = 0; c < VDIM; c++)
289       {
290          for (int qy = 0; qy < Q1D; ++qy)
291          {
292             for (int qx = 0; qx < Q1D; ++qx)
293             {
294                grad[qy][qx][0] = 0.0;
295                grad[qy][qx][1] = 0.0;
296             }
297          }
298          for (int dy = 0; dy < D1D; ++dy)
299          {
300             double gradX[max_Q1D][2];
301             for (int qx = 0; qx < Q1D; ++qx)
302             {
303                gradX[qx][0] = 0.0;
304                gradX[qx][1] = 0.0;
305             }
306             for (int dx = 0; dx < D1D; ++dx)
307             {
308                const double s = x(dx,dy,c,e);
309                for (int qx = 0; qx < Q1D; ++qx)
310                {
311                   gradX[qx][0] += s * B(qx,dx);
312                   gradX[qx][1] += s * G(qx,dx);
313                }
314             }
315             for (int qy = 0; qy < Q1D; ++qy)
316             {
317                const double wy  = B(qy,dy);
318                const double wDy = G(qy,dy);
319                for (int qx = 0; qx < Q1D; ++qx)
320                {
321                   grad[qy][qx][0] += gradX[qx][1] * wy;
322                   grad[qy][qx][1] += gradX[qx][0] * wDy;
323                }
324             }
325          }
326          // Calculate Dxy, xDy in plane
327          for (int qy = 0; qy < Q1D; ++qy)
328          {
329             for (int qx = 0; qx < Q1D; ++qx)
330             {
331                const int q = qx + qy * Q1D;
332                const double O11 = D(q,0,e);
333                const double O12 = D(q,1,e);
334                const double O22 = D(q,2,e);
335                const double gradX = grad[qy][qx][0];
336                const double gradY = grad[qy][qx][1];
337                grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
338                grad[qy][qx][1] = (O12 * gradX) + (O22 * gradY);
339             }
340          }
341          for (int qy = 0; qy < Q1D; ++qy)
342          {
343             double gradX[max_D1D][2];
344             for (int dx = 0; dx < D1D; ++dx)
345             {
346                gradX[dx][0] = 0.0;
347                gradX[dx][1] = 0.0;
348             }
349             for (int qx = 0; qx < Q1D; ++qx)
350             {
351                const double gX = grad[qy][qx][0];
352                const double gY = grad[qy][qx][1];
353                for (int dx = 0; dx < D1D; ++dx)
354                {
355                   const double wx  = Bt(dx,qx);
356                   const double wDx = Gt(dx,qx);
357                   gradX[dx][0] += gX * wDx;
358                   gradX[dx][1] += gY * wx;
359                }
360             }
361             for (int dy = 0; dy < D1D; ++dy)
362             {
363                const double wy  = Bt(dy,qy);
364                const double wDy = Gt(dy,qy);
365                for (int dx = 0; dx < D1D; ++dx)
366                {
367                   y(dx,dy,c,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
368                }
369             }
370          }
371       }
372    });
373 }
374 
375 // PA Diffusion Apply 3D kernel
376 template<const int T_D1D = 0,
377          const int T_Q1D = 0> static
PAVectorDiffusionApply3D(const int NE,const Array<double> & b,const Array<double> & g,const Array<double> & bt,const Array<double> & gt,const Vector & op_,const Vector & x_,Vector & y_,int d1d=0,int q1d=0)378 void PAVectorDiffusionApply3D(const int NE,
379                               const Array<double> &b,
380                               const Array<double> &g,
381                               const Array<double> &bt,
382                               const Array<double> &gt,
383                               const Vector &op_,
384                               const Vector &x_,
385                               Vector &y_,
386                               int d1d = 0, int q1d = 0)
387 {
388    const int D1D = T_D1D ? T_D1D : d1d;
389    const int Q1D = T_Q1D ? T_Q1D : q1d;
390    constexpr int VDIM = 3;
391    MFEM_VERIFY(D1D <= MAX_D1D, "");
392    MFEM_VERIFY(Q1D <= MAX_Q1D, "");
393    auto B = Reshape(b.Read(), Q1D, D1D);
394    auto G = Reshape(g.Read(), Q1D, D1D);
395    auto Bt = Reshape(bt.Read(), D1D, Q1D);
396    auto Gt = Reshape(gt.Read(), D1D, Q1D);
397    auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 6, NE);
398    auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
399    auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
400    MFEM_FORALL(e, NE,
401    {
402       const int D1D = T_D1D ? T_D1D : d1d;
403       const int Q1D = T_Q1D ? T_Q1D : q1d;
404       constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
405       constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
406       for (int c = 0; c < VDIM; ++ c)
407       {
408          double grad[max_Q1D][max_Q1D][max_Q1D][3];
409          for (int qz = 0; qz < Q1D; ++qz)
410          {
411             for (int qy = 0; qy < Q1D; ++qy)
412             {
413                for (int qx = 0; qx < Q1D; ++qx)
414                {
415                   grad[qz][qy][qx][0] = 0.0;
416                   grad[qz][qy][qx][1] = 0.0;
417                   grad[qz][qy][qx][2] = 0.0;
418                }
419             }
420          }
421          for (int dz = 0; dz < D1D; ++dz)
422          {
423             double gradXY[max_Q1D][max_Q1D][3];
424             for (int qy = 0; qy < Q1D; ++qy)
425             {
426                for (int qx = 0; qx < Q1D; ++qx)
427                {
428                   gradXY[qy][qx][0] = 0.0;
429                   gradXY[qy][qx][1] = 0.0;
430                   gradXY[qy][qx][2] = 0.0;
431                }
432             }
433             for (int dy = 0; dy < D1D; ++dy)
434             {
435                double gradX[max_Q1D][2];
436                for (int qx = 0; qx < Q1D; ++qx)
437                {
438                   gradX[qx][0] = 0.0;
439                   gradX[qx][1] = 0.0;
440                }
441                for (int dx = 0; dx < D1D; ++dx)
442                {
443                   const double s = x(dx,dy,dz,c,e);
444                   for (int qx = 0; qx < Q1D; ++qx)
445                   {
446                      gradX[qx][0] += s * B(qx,dx);
447                      gradX[qx][1] += s * G(qx,dx);
448                   }
449                }
450                for (int qy = 0; qy < Q1D; ++qy)
451                {
452                   const double wy  = B(qy,dy);
453                   const double wDy = G(qy,dy);
454                   for (int qx = 0; qx < Q1D; ++qx)
455                   {
456                      const double wx  = gradX[qx][0];
457                      const double wDx = gradX[qx][1];
458                      gradXY[qy][qx][0] += wDx * wy;
459                      gradXY[qy][qx][1] += wx  * wDy;
460                      gradXY[qy][qx][2] += wx  * wy;
461                   }
462                }
463             }
464             for (int qz = 0; qz < Q1D; ++qz)
465             {
466                const double wz  = B(qz,dz);
467                const double wDz = G(qz,dz);
468                for (int qy = 0; qy < Q1D; ++qy)
469                {
470                   for (int qx = 0; qx < Q1D; ++qx)
471                   {
472                      grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
473                      grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
474                      grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
475                   }
476                }
477             }
478          }
479          // Calculate Dxyz, xDyz, xyDz in plane
480          for (int qz = 0; qz < Q1D; ++qz)
481          {
482             for (int qy = 0; qy < Q1D; ++qy)
483             {
484                for (int qx = 0; qx < Q1D; ++qx)
485                {
486                   const int q = qx + (qy + qz * Q1D) * Q1D;
487                   const double O11 = op(q,0,e);
488                   const double O12 = op(q,1,e);
489                   const double O13 = op(q,2,e);
490                   const double O22 = op(q,3,e);
491                   const double O23 = op(q,4,e);
492                   const double O33 = op(q,5,e);
493                   const double gradX = grad[qz][qy][qx][0];
494                   const double gradY = grad[qz][qy][qx][1];
495                   const double gradZ = grad[qz][qy][qx][2];
496                   grad[qz][qy][qx][0] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
497                   grad[qz][qy][qx][1] = (O12*gradX)+(O22*gradY)+(O23*gradZ);
498                   grad[qz][qy][qx][2] = (O13*gradX)+(O23*gradY)+(O33*gradZ);
499                }
500             }
501          }
502          for (int qz = 0; qz < Q1D; ++qz)
503          {
504             double gradXY[max_D1D][max_D1D][3];
505             for (int dy = 0; dy < D1D; ++dy)
506             {
507                for (int dx = 0; dx < D1D; ++dx)
508                {
509                   gradXY[dy][dx][0] = 0;
510                   gradXY[dy][dx][1] = 0;
511                   gradXY[dy][dx][2] = 0;
512                }
513             }
514             for (int qy = 0; qy < Q1D; ++qy)
515             {
516                double gradX[max_D1D][3];
517                for (int dx = 0; dx < D1D; ++dx)
518                {
519                   gradX[dx][0] = 0;
520                   gradX[dx][1] = 0;
521                   gradX[dx][2] = 0;
522                }
523                for (int qx = 0; qx < Q1D; ++qx)
524                {
525                   const double gX = grad[qz][qy][qx][0];
526                   const double gY = grad[qz][qy][qx][1];
527                   const double gZ = grad[qz][qy][qx][2];
528                   for (int dx = 0; dx < D1D; ++dx)
529                   {
530                      const double wx  = Bt(dx,qx);
531                      const double wDx = Gt(dx,qx);
532                      gradX[dx][0] += gX * wDx;
533                      gradX[dx][1] += gY * wx;
534                      gradX[dx][2] += gZ * wx;
535                   }
536                }
537                for (int dy = 0; dy < D1D; ++dy)
538                {
539                   const double wy  = Bt(dy,qy);
540                   const double wDy = Gt(dy,qy);
541                   for (int dx = 0; dx < D1D; ++dx)
542                   {
543                      gradXY[dy][dx][0] += gradX[dx][0] * wy;
544                      gradXY[dy][dx][1] += gradX[dx][1] * wDy;
545                      gradXY[dy][dx][2] += gradX[dx][2] * wy;
546                   }
547                }
548             }
549             for (int dz = 0; dz < D1D; ++dz)
550             {
551                const double wz  = Bt(dz,qz);
552                const double wDz = Gt(dz,qz);
553                for (int dy = 0; dy < D1D; ++dy)
554                {
555                   for (int dx = 0; dx < D1D; ++dx)
556                   {
557                      y(dx,dy,dz,c,e) +=
558                         ((gradXY[dy][dx][0] * wz) +
559                          (gradXY[dy][dx][1] * wz) +
560                          (gradXY[dy][dx][2] * wDz));
561                   }
562                }
563             }
564          }
565       }
566    });
567 }
568 
569 // PA Diffusion Apply kernel
AddMultPA(const Vector & x,Vector & y) const570 void VectorDiffusionIntegrator::AddMultPA(const Vector &x, Vector &y) const
571 {
572    if (DeviceCanUseCeed())
573    {
574       ceedOp->AddMult(x, y);
575    }
576    else
577    {
578       const int D1D = dofs1D;
579       const int Q1D = quad1D;
580       const Array<double> &B = maps->B;
581       const Array<double> &G = maps->G;
582       const Array<double> &Bt = maps->Bt;
583       const Array<double> &Gt = maps->Gt;
584       const Vector &D = pa_data;
585 
586       if (dim == 2 && sdim == 3)
587       {
588          switch ((dofs1D << 4 ) | quad1D)
589          {
590             case 0x22: return PAVectorDiffusionApply2D<2,2,3>(ne,B,G,Bt,Gt,D,x,y);
591             case 0x33: return PAVectorDiffusionApply2D<3,3,3>(ne,B,G,Bt,Gt,D,x,y);
592             case 0x44: return PAVectorDiffusionApply2D<4,4,3>(ne,B,G,Bt,Gt,D,x,y);
593             case 0x55: return PAVectorDiffusionApply2D<5,5,3>(ne,B,G,Bt,Gt,D,x,y);
594             default:
595                return PAVectorDiffusionApply2D(ne,B,G,Bt,Gt,D,x,y,D1D,Q1D,sdim);
596          }
597       }
598       if (dim == 2 && sdim == 2)
599       { return PAVectorDiffusionApply2D(ne,B,G,Bt,Gt,D,x,y,D1D,Q1D,sdim); }
600 
601       if (dim == 3 && sdim == 3)
602       { return PAVectorDiffusionApply3D(ne,B,G,Bt,Gt,D,x,y,D1D,Q1D); }
603 
604       MFEM_ABORT("Unknown kernel.");
605    }
606 }
607 
608 template<int T_D1D = 0, int T_Q1D = 0>
PAVectorDiffusionDiagonal2D(const int NE,const Array<double> & b,const Array<double> & g,const Vector & d,Vector & y,const int d1d=0,const int q1d=0)609 static void PAVectorDiffusionDiagonal2D(const int NE,
610                                         const Array<double> &b,
611                                         const Array<double> &g,
612                                         const Vector &d,
613                                         Vector &y,
614                                         const int d1d = 0,
615                                         const int q1d = 0)
616 {
617    const int D1D = T_D1D ? T_D1D : d1d;
618    const int Q1D = T_Q1D ? T_Q1D : q1d;
619    MFEM_VERIFY(D1D <= MAX_D1D, "");
620    MFEM_VERIFY(Q1D <= MAX_Q1D, "");
621    auto B = Reshape(b.Read(), Q1D, D1D);
622    auto G = Reshape(g.Read(), Q1D, D1D);
623    // note the different shape for D, this is a (symmetric) matrix so we only
624    // store necessary entries
625    auto D = Reshape(d.Read(), Q1D*Q1D, 3, NE);
626    auto Y = Reshape(y.ReadWrite(), D1D, D1D, 2, NE);
627    MFEM_FORALL(e, NE,
628    {
629       const int D1D = T_D1D ? T_D1D : d1d;
630       const int Q1D = T_Q1D ? T_Q1D : q1d;
631       constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
632       constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
633       // gradphi \cdot Q \gradphi has four terms
634       double QD0[MQ1][MD1];
635       double QD1[MQ1][MD1];
636       double QD2[MQ1][MD1];
637       for (int qx = 0; qx < Q1D; ++qx)
638       {
639          for (int dy = 0; dy < D1D; ++dy)
640          {
641             QD0[qx][dy] = 0.0;
642             QD1[qx][dy] = 0.0;
643             QD2[qx][dy] = 0.0;
644             for (int qy = 0; qy < Q1D; ++qy)
645             {
646                const int q = qx + qy * Q1D;
647                const double D0 = D(q,0,e);
648                const double D1 = D(q,1,e);
649                const double D2 = D(q,2,e);
650                QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D0;
651                QD1[qx][dy] += B(qy, dy) * G(qy, dy) * D1;
652                QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D2;
653             }
654          }
655       }
656       for (int dy = 0; dy < D1D; ++dy)
657       {
658          for (int dx = 0; dx < D1D; ++dx)
659          {
660             double temp = 0.0;
661             for (int qx = 0; qx < Q1D; ++qx)
662             {
663                temp += G(qx, dx) * G(qx, dx) * QD0[qx][dy];
664                temp += G(qx, dx) * B(qx, dx) * QD1[qx][dy];
665                temp += B(qx, dx) * G(qx, dx) * QD1[qx][dy];
666                temp += B(qx, dx) * B(qx, dx) * QD2[qx][dy];
667             }
668             Y(dx,dy,0,e) += temp;
669             Y(dx,dy,1,e) += temp;
670          }
671       }
672    });
673 }
674 
675 template<int T_D1D = 0, int T_Q1D = 0>
PAVectorDiffusionDiagonal3D(const int NE,const Array<double> & b,const Array<double> & g,const Vector & d,Vector & y,const int d1d=0,const int q1d=0)676 static void PAVectorDiffusionDiagonal3D(const int NE,
677                                         const Array<double> &b,
678                                         const Array<double> &g,
679                                         const Vector &d,
680                                         Vector &y,
681                                         const int d1d = 0,
682                                         const int q1d = 0)
683 {
684    constexpr int DIM = 3;
685    const int D1D = T_D1D ? T_D1D : d1d;
686    const int Q1D = T_Q1D ? T_Q1D : q1d;
687    constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
688    constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
689    MFEM_VERIFY(D1D <= MD1, "");
690    MFEM_VERIFY(Q1D <= MQ1, "");
691    auto B = Reshape(b.Read(), Q1D, D1D);
692    auto G = Reshape(g.Read(), Q1D, D1D);
693    auto Q = Reshape(d.Read(), Q1D*Q1D*Q1D, 6, NE);
694    auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, 3, NE);
695    MFEM_FORALL(e, NE,
696    {
697       const int D1D = T_D1D ? T_D1D : d1d;
698       const int Q1D = T_Q1D ? T_Q1D : q1d;
699       constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
700       constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
701       double QQD[MQ1][MQ1][MD1];
702       double QDD[MQ1][MD1][MD1];
703       for (int i = 0; i < DIM; ++i)
704       {
705          for (int j = 0; j < DIM; ++j)
706          {
707             // first tensor contraction, along z direction
708             for (int qx = 0; qx < Q1D; ++qx)
709             {
710                for (int qy = 0; qy < Q1D; ++qy)
711                {
712                   for (int dz = 0; dz < D1D; ++dz)
713                   {
714                      QQD[qx][qy][dz] = 0.0;
715                      for (int qz = 0; qz < Q1D; ++qz)
716                      {
717                         const int q = qx + (qy + qz * Q1D) * Q1D;
718                         const int k = j >= i ?
719                         3 - (3-i)*(2-i)/2 + j:
720                         3 - (3-j)*(2-j)/2 + i;
721                         const double O = Q(q,k,e);
722                         const double Bz = B(qz,dz);
723                         const double Gz = G(qz,dz);
724                         const double L = i==2 ? Gz : Bz;
725                         const double R = j==2 ? Gz : Bz;
726                         QQD[qx][qy][dz] += L * O * R;
727                      }
728                   }
729                }
730             }
731             // second tensor contraction, along y direction
732             for (int qx = 0; qx < Q1D; ++qx)
733             {
734                for (int dz = 0; dz < D1D; ++dz)
735                {
736                   for (int dy = 0; dy < D1D; ++dy)
737                   {
738                      QDD[qx][dy][dz] = 0.0;
739                      for (int qy = 0; qy < Q1D; ++qy)
740                      {
741                         const double By = B(qy,dy);
742                         const double Gy = G(qy,dy);
743                         const double L = i==1 ? Gy : By;
744                         const double R = j==1 ? Gy : By;
745                         QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
746                      }
747                   }
748                }
749             }
750             // third tensor contraction, along x direction
751             for (int dz = 0; dz < D1D; ++dz)
752             {
753                for (int dy = 0; dy < D1D; ++dy)
754                {
755                   for (int dx = 0; dx < D1D; ++dx)
756                   {
757                      double temp = 0.0;
758                      for (int qx = 0; qx < Q1D; ++qx)
759                      {
760                         const double Bx = B(qx,dx);
761                         const double Gx = G(qx,dx);
762                         const double L = i==0 ? Gx : Bx;
763                         const double R = j==0 ? Gx : Bx;
764                         temp += L * QDD[qx][dy][dz] * R;
765                      }
766                      Y(dx, dy, dz, 0, e) += temp;
767                      Y(dx, dy, dz, 1, e) += temp;
768                      Y(dx, dy, dz, 2, e) += temp;
769                   }
770                }
771             }
772          }
773       }
774    });
775 }
776 
PAVectorDiffusionAssembleDiagonal(const int dim,const int D1D,const int Q1D,const int NE,const Array<double> & B,const Array<double> & G,const Vector & op,Vector & y)777 static void PAVectorDiffusionAssembleDiagonal(const int dim,
778                                               const int D1D,
779                                               const int Q1D,
780                                               const int NE,
781                                               const Array<double> &B,
782                                               const Array<double> &G,
783                                               const Vector &op,
784                                               Vector &y)
785 {
786    if (dim == 2)
787    {
788       return PAVectorDiffusionDiagonal2D(NE, B, G, op, y, D1D, Q1D);
789    }
790    else if (dim == 3)
791    {
792       return PAVectorDiffusionDiagonal3D(NE, B, G, op, y, D1D, Q1D);
793    }
794    MFEM_ABORT("Dimension not implemented.");
795 }
796 
AssembleDiagonalPA(Vector & diag)797 void VectorDiffusionIntegrator::AssembleDiagonalPA(Vector &diag)
798 {
799    if (DeviceCanUseCeed())
800    {
801       ceedOp->GetDiagonal(diag);
802    }
803    else
804    {
805       PAVectorDiffusionAssembleDiagonal(dim,
806                                         dofs1D,
807                                         quad1D,
808                                         ne,
809                                         maps->B,
810                                         maps->G,
811                                         pa_data,
812                                         diag);
813    }
814 }
815 
816 } // namespace mfem
817