1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13
14 /*
15 The equations for horizontal velocity (u,v) are
16
17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19
20 where
21
22 eta = B/2 (epsilon + gamma)^((p-2)/2)
23
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26
27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28
29 The surface boundary conditions are the natural conditions. The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33
34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43
44 There are two compile-time options:
45
46 NO_SSE2:
47 If the host supports SSE2, we use integration code that has been vectorized with SSE2
48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about
49 30% on my architecture (P8700, gcc-4.5 snapshot).
50
51 COMPUTE_LOWER_TRIANGULAR:
52 The element matrices we assemble are lower-triangular so it is not necessary to compute
53 all entries explicitly. If this macro is defined, the lower-triangular entries are
54 computed explicitly.
55
56 */
57
58 #if defined(PETSC_APPLE_FRAMEWORK)
59 #import <PETSc/petscsnes.h>
60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */
61 #else
62
63 #include <petscsnes.h>
64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */
65 #endif
66 #include <ctype.h> /* toupper() */
67
68 #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI)
69 /* c++ cannot handle [_restrict_] notation like C does */
70 #undef PETSC_RESTRICT
71 #define PETSC_RESTRICT
72 #endif
73
74 #if defined __SSE2__
75 # include <emmintrin.h>
76 #endif
77
78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
79 #if !defined NO_SSE2 \
80 && !defined PETSC_USE_COMPLEX \
81 && !defined PETSC_USE_REAL_SINGLE \
82 && !defined PETSC_USE_REAL___FLOAT128 \
83 && !defined PETSC_USE_REAL___FP16 \
84 && defined __SSE2__
85 #define USE_SSE2_KERNELS 1
86 #else
87 #define USE_SSE2_KERNELS 0
88 #endif
89
90 static PetscClassId THI_CLASSID;
91
92 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
93 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
94 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
95 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573};
96 #define G 0.57735026918962573
97 #define H (0.5*(1.+G))
98 #define L (0.5*(1.-G))
99 #define M (-0.5)
100 #define P (0.5)
101 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
102 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
103 {0,H,0,0,0,L,0,0},
104 {0,0,H,0,0,0,L,0},
105 {0,0,0,H,0,0,0,L},
106 {L,0,0,0,H,0,0,0},
107 {0,L,0,0,0,H,0,0},
108 {0,0,L,0,0,0,H,0},
109 {0,0,0,L,0,0,0,H}};
110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
111 {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} },
112 {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} },
113 {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} },
114 {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}},
115 {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} },
116 {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} },
117 {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} },
118 {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}};
119 /* Stanndard Gauss */
120 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
121 {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
122 {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
123 {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
124 {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
125 {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
126 {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
127 {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
128 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
129 {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
130 {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
131 {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
132 {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
133 {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
134 {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
135 {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
136 {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
137 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
138 /* Standard 2x2 Gauss quadrature for the bottom layer. */
139 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
140 {L*H,H*H,H*L,L*L},
141 {L*L,H*L,H*H,L*H},
142 {H*L,L*L,L*H,H*H}};
143 static const PetscReal QuadQDeriv[4][4][2] = {
144 {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
145 {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
146 {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
147 {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
148 #undef G
149 #undef H
150 #undef L
151 #undef M
152 #undef P
153
154 #define HexExtract(x,i,j,k,n) do { \
155 (n)[0] = (x)[i][j][k]; \
156 (n)[1] = (x)[i+1][j][k]; \
157 (n)[2] = (x)[i+1][j+1][k]; \
158 (n)[3] = (x)[i][j+1][k]; \
159 (n)[4] = (x)[i][j][k+1]; \
160 (n)[5] = (x)[i+1][j][k+1]; \
161 (n)[6] = (x)[i+1][j+1][k+1]; \
162 (n)[7] = (x)[i][j+1][k+1]; \
163 } while (0)
164
165 #define HexExtractRef(x,i,j,k,n) do { \
166 (n)[0] = &(x)[i][j][k]; \
167 (n)[1] = &(x)[i+1][j][k]; \
168 (n)[2] = &(x)[i+1][j+1][k]; \
169 (n)[3] = &(x)[i][j+1][k]; \
170 (n)[4] = &(x)[i][j][k+1]; \
171 (n)[5] = &(x)[i+1][j][k+1]; \
172 (n)[6] = &(x)[i+1][j+1][k+1]; \
173 (n)[7] = &(x)[i][j+1][k+1]; \
174 } while (0)
175
176 #define QuadExtract(x,i,j,n) do { \
177 (n)[0] = (x)[i][j]; \
178 (n)[1] = (x)[i+1][j]; \
179 (n)[2] = (x)[i+1][j+1]; \
180 (n)[3] = (x)[i][j+1]; \
181 } while (0)
182
HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])183 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
184 {
185 PetscInt i;
186 dz[0] = dz[1] = dz[2] = 0;
187 for (i=0; i<8; i++) {
188 dz[0] += dphi[i][0] * zn[i];
189 dz[1] += dphi[i][1] * zn[i];
190 dz[2] += dphi[i][2] * zn[i];
191 }
192 }
193
HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal * PETSC_RESTRICT jw)194 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw)
195 {
196 const PetscReal jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}};
197 const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}};
198 const PetscReal jdet = jac[0][0]*jac[1][1]*jac[2][2];
199 PetscInt i;
200
201 for (i=0; i<8; i++) {
202 const PetscReal *dphir = HexQDeriv[q][i];
203 phi[i] = HexQInterp[q][i];
204 dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
205 dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
206 dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
207 }
208 *jw = 1.0 * jdet;
209 }
210
211 typedef struct _p_THI *THI;
212 typedef struct _n_Units *Units;
213
214 typedef struct {
215 PetscScalar u,v;
216 } Node;
217
218 typedef struct {
219 PetscScalar b; /* bed */
220 PetscScalar h; /* thickness */
221 PetscScalar beta2; /* friction */
222 } PrmNode;
223
224 typedef struct {
225 PetscReal min,max,cmin,cmax;
226 } PRange;
227
228 typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode;
229
230 struct _p_THI {
231 PETSCHEADER(int);
232 void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
233 PetscInt zlevels;
234 PetscReal Lx,Ly,Lz; /* Model domain */
235 PetscReal alpha; /* Bed angle */
236 Units units;
237 PetscReal dirichlet_scale;
238 PetscReal ssa_friction_scale;
239 PRange eta;
240 PRange beta2;
241 struct {
242 PetscReal Bd2,eps,exponent;
243 } viscosity;
244 struct {
245 PetscReal irefgam,eps2,exponent,refvel,epsvel;
246 } friction;
247 PetscReal rhog;
248 PetscBool no_slip;
249 PetscBool tridiagonal;
250 PetscBool coarse2d;
251 PetscBool verbose;
252 MatType mattype;
253 };
254
255 struct _n_Units {
256 /* fundamental */
257 PetscReal meter;
258 PetscReal kilogram;
259 PetscReal second;
260 /* derived */
261 PetscReal Pascal;
262 PetscReal year;
263 };
264
265 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI);
266 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI);
267 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI);
268
PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])269 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
270 {
271 const PetscScalar zm1 = zm-1,
272 znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
273 pn[1].b + pn[1].h*(PetscScalar)k/zm1,
274 pn[2].b + pn[2].h*(PetscScalar)k/zm1,
275 pn[3].b + pn[3].h*(PetscScalar)k/zm1,
276 pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
277 pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
278 pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
279 pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
280 PetscInt i;
281 for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
282 }
283
284 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode * p)285 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
286 {
287 Units units = thi->units;
288 PetscReal s = -x*PetscSinReal(thi->alpha);
289
290 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
291 p->h = s - p->b;
292 p->beta2 = 1e30;
293 }
294
THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode * p)295 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
296 {
297 Units units = thi->units;
298 PetscReal s = -x*PetscSinReal(thi->alpha);
299
300 p->b = s - 1000*units->meter;
301 p->h = s - p->b;
302 /* tau_b = beta2 v is a stress (Pa) */
303 p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter;
304 }
305
306 /* These are just toys */
307
308 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode * p)309 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
310 {
311 Units units = thi->units;
312 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
313 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
314 p->b = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
315 p->h = s - p->b;
316 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
317 }
318
319 /* Like Z, but with 200 meter cliffs */
THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode * p)320 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
321 {
322 Units units = thi->units;
323 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
324 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
325
326 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
327 if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
328 p->h = s - p->b;
329 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
330 }
331
332 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode * p)333 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
334 {
335 Units units = thi->units;
336 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
337 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
338
339 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
340 p->h = s - p->b;
341 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
342 }
343
THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal * beta2,PetscReal * dbeta2)344 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
345 {
346 if (thi->friction.irefgam == 0) {
347 Units units = thi->units;
348 thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year));
349 thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
350 }
351 if (thi->friction.exponent == 0) {
352 *beta2 = rbeta2;
353 *dbeta2 = 0;
354 } else {
355 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
356 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
357 }
358 }
359
THIViscosity(THI thi,PetscReal gam,PetscReal * eta,PetscReal * deta)360 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
361 {
362 PetscReal Bd2,eps,exponent;
363 if (thi->viscosity.Bd2 == 0) {
364 Units units = thi->units;
365 const PetscReal
366 n = 3., /* Glen exponent */
367 p = 1. + 1./n, /* for Stokes */
368 A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
369 B = PetscPowReal(A,-1./n); /* hardness parameter */
370 thi->viscosity.Bd2 = B/2;
371 thi->viscosity.exponent = (p-2)/2;
372 thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year);
373 }
374 Bd2 = thi->viscosity.Bd2;
375 exponent = thi->viscosity.exponent;
376 eps = thi->viscosity.eps;
377 *eta = Bd2 * PetscPowReal(eps + gam,exponent);
378 *deta = exponent * (*eta) / (eps + gam);
379 }
380
RangeUpdate(PetscReal * min,PetscReal * max,PetscReal x)381 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
382 {
383 if (x < *min) *min = x;
384 if (x > *max) *max = x;
385 }
386
PRangeClear(PRange * p)387 static void PRangeClear(PRange *p)
388 {
389 p->cmin = p->min = 1e100;
390 p->cmax = p->max = -1e100;
391 }
392
PRangeMinMax(PRange * p,PetscReal min,PetscReal max)393 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
394 {
395
396 PetscFunctionBeginUser;
397 p->cmin = min;
398 p->cmax = max;
399 if (min < p->min) p->min = min;
400 if (max > p->max) p->max = max;
401 PetscFunctionReturn(0);
402 }
403
THIDestroy(THI * thi)404 static PetscErrorCode THIDestroy(THI *thi)
405 {
406 PetscErrorCode ierr;
407
408 PetscFunctionBeginUser;
409 if (!*thi) PetscFunctionReturn(0);
410 if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; PetscFunctionReturn(0);}
411 ierr = PetscFree((*thi)->units);CHKERRQ(ierr);
412 ierr = PetscFree((*thi)->mattype);CHKERRQ(ierr);
413 ierr = PetscHeaderDestroy(thi);CHKERRQ(ierr);
414 PetscFunctionReturn(0);
415 }
416
THICreate(MPI_Comm comm,THI * inthi)417 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
418 {
419 static PetscBool registered = PETSC_FALSE;
420 THI thi;
421 Units units;
422 PetscErrorCode ierr;
423
424 PetscFunctionBeginUser;
425 *inthi = 0;
426 if (!registered) {
427 ierr = PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);CHKERRQ(ierr);
428 registered = PETSC_TRUE;
429 }
430 ierr = PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0);CHKERRQ(ierr);
431
432 ierr = PetscNew(&thi->units);CHKERRQ(ierr);
433 units = thi->units;
434 units->meter = 1e-2;
435 units->second = 1e-7;
436 units->kilogram = 1e-12;
437
438 ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");CHKERRQ(ierr);
439 {
440 ierr = PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);CHKERRQ(ierr);
441 ierr = PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);CHKERRQ(ierr);
442 ierr = PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);CHKERRQ(ierr);
443 }
444 ierr = PetscOptionsEnd();CHKERRQ(ierr);
445 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
446 units->year = 31556926. * units->second; /* seconds per year */
447
448 thi->Lx = 10.e3;
449 thi->Ly = 10.e3;
450 thi->Lz = 1000;
451 thi->dirichlet_scale = 1;
452 thi->verbose = PETSC_FALSE;
453
454 ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");CHKERRQ(ierr);
455 {
456 QuadratureType quad = QUAD_GAUSS;
457 char homexp[] = "A";
458 char mtype[256] = MATSBAIJ;
459 PetscReal L,m = 1.0;
460 PetscBool flg;
461 L = thi->Lx;
462 ierr = PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);CHKERRQ(ierr);
463 if (flg) thi->Lx = thi->Ly = L;
464 ierr = PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);CHKERRQ(ierr);
465 ierr = PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);CHKERRQ(ierr);
466 ierr = PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);CHKERRQ(ierr);
467 ierr = PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);CHKERRQ(ierr);
468 switch (homexp[0] = toupper(homexp[0])) {
469 case 'A':
470 thi->initialize = THIInitialize_HOM_A;
471 thi->no_slip = PETSC_TRUE;
472 thi->alpha = 0.5;
473 break;
474 case 'C':
475 thi->initialize = THIInitialize_HOM_C;
476 thi->no_slip = PETSC_FALSE;
477 thi->alpha = 0.1;
478 break;
479 case 'X':
480 thi->initialize = THIInitialize_HOM_X;
481 thi->no_slip = PETSC_FALSE;
482 thi->alpha = 0.3;
483 break;
484 case 'Y':
485 thi->initialize = THIInitialize_HOM_Y;
486 thi->no_slip = PETSC_FALSE;
487 thi->alpha = 0.5;
488 break;
489 case 'Z':
490 thi->initialize = THIInitialize_HOM_Z;
491 thi->no_slip = PETSC_FALSE;
492 thi->alpha = 0.5;
493 break;
494 default:
495 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
496 }
497 ierr = PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);CHKERRQ(ierr);
498 switch (quad) {
499 case QUAD_GAUSS:
500 HexQInterp = HexQInterp_Gauss;
501 HexQDeriv = HexQDeriv_Gauss;
502 break;
503 case QUAD_LOBATTO:
504 HexQInterp = HexQInterp_Lobatto;
505 HexQDeriv = HexQDeriv_Lobatto;
506 break;
507 }
508 ierr = PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);CHKERRQ(ierr);
509
510 thi->friction.refvel = 100.;
511 thi->friction.epsvel = 1.;
512
513 ierr = PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL);CHKERRQ(ierr);
514 ierr = PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL);CHKERRQ(ierr);
515 ierr = PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);CHKERRQ(ierr);
516
517 thi->friction.exponent = (m-1)/2;
518
519 ierr = PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);CHKERRQ(ierr);
520 ierr = PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);CHKERRQ(ierr);
521 ierr = PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);CHKERRQ(ierr);
522 ierr = PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);CHKERRQ(ierr);
523 ierr = PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);CHKERRQ(ierr);
524 ierr = PetscStrallocpy(mtype,(char**)&thi->mattype);CHKERRQ(ierr);
525 ierr = PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);CHKERRQ(ierr);
526 }
527 ierr = PetscOptionsEnd();CHKERRQ(ierr);
528
529 /* dimensionalize */
530 thi->Lx *= units->meter;
531 thi->Ly *= units->meter;
532 thi->Lz *= units->meter;
533 thi->alpha *= PETSC_PI / 180;
534
535 PRangeClear(&thi->eta);
536 PRangeClear(&thi->beta2);
537
538 {
539 PetscReal u = 1000*units->meter/(3e7*units->second),
540 gradu = u / (100*units->meter),eta,deta,
541 rho = 910 * units->kilogram/PetscPowReal(units->meter,3),
542 grav = 9.81 * units->meter/PetscSqr(units->second),
543 driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
544 THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
545 thi->rhog = rho * grav;
546 if (thi->verbose) {
547 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal);CHKERRQ(ierr);
548 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving);CHKERRQ(ierr);
549 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving));CHKERRQ(ierr);
550 THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
551 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving));CHKERRQ(ierr);
552 }
553 }
554
555 *inthi = thi;
556 PetscFunctionReturn(0);
557 }
558
THIInitializePrm(THI thi,DM da2prm,Vec prm)559 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm)
560 {
561 PrmNode **p;
562 PetscInt i,j,xs,xm,ys,ym,mx,my;
563 PetscErrorCode ierr;
564
565 PetscFunctionBeginUser;
566 ierr = DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);CHKERRQ(ierr);
567 ierr = DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
568 ierr = DMDAVecGetArray(da2prm,prm,&p);CHKERRQ(ierr);
569 for (i=xs; i<xs+xm; i++) {
570 for (j=ys; j<ys+ym; j++) {
571 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
572 thi->initialize(thi,xx,yy,&p[i][j]);
573 }
574 }
575 ierr = DMDAVecRestoreArray(da2prm,prm,&p);CHKERRQ(ierr);
576 PetscFunctionReturn(0);
577 }
578
THISetUpDM(THI thi,DM dm)579 static PetscErrorCode THISetUpDM(THI thi,DM dm)
580 {
581 PetscErrorCode ierr;
582 PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s;
583 DMDAStencilType st;
584 DM da2prm;
585 Vec X;
586
587 PetscFunctionBeginUser;
588 ierr = DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);CHKERRQ(ierr);
589 if (dim == 2) {
590 ierr = DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st);CHKERRQ(ierr);
591 }
592 ierr = DMGetRefineLevel(dm,&refinelevel);CHKERRQ(ierr);
593 ierr = DMGetCoarsenLevel(dm,&coarsenlevel);CHKERRQ(ierr);
594 level = refinelevel - coarsenlevel;
595 ierr = DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);CHKERRQ(ierr);
596 ierr = DMSetUp(da2prm);CHKERRQ(ierr);
597 ierr = DMCreateLocalVector(da2prm,&X);CHKERRQ(ierr);
598 {
599 PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
600 if (dim == 2) {
601 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My));CHKERRQ(ierr);
602 } else {
603 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)));CHKERRQ(ierr);
604 }
605 }
606 ierr = THIInitializePrm(thi,da2prm,X);CHKERRQ(ierr);
607 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */
608 ierr = DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);CHKERRQ(ierr);
609 }
610 if (thi->coarse2d) {
611 ierr = DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi);CHKERRQ(ierr);
612 }
613 ierr = PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm);CHKERRQ(ierr);
614 ierr = PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X);CHKERRQ(ierr);
615 ierr = DMDestroy(&da2prm);CHKERRQ(ierr);
616 ierr = VecDestroy(&X);CHKERRQ(ierr);
617 PetscFunctionReturn(0);
618 }
619
DMCoarsenHook_THI(DM dmf,DM dmc,void * ctx)620 static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx)
621 {
622 THI thi = (THI)ctx;
623 PetscErrorCode ierr;
624 PetscInt rlevel,clevel;
625
626 PetscFunctionBeginUser;
627 ierr = THISetUpDM(thi,dmc);CHKERRQ(ierr);
628 ierr = DMGetRefineLevel(dmc,&rlevel);CHKERRQ(ierr);
629 ierr = DMGetCoarsenLevel(dmc,&clevel);CHKERRQ(ierr);
630 if (rlevel-clevel == 0) {ierr = DMSetMatType(dmc,MATAIJ);CHKERRQ(ierr);}
631 ierr = DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr);
632 PetscFunctionReturn(0);
633 }
634
DMRefineHook_THI(DM dmc,DM dmf,void * ctx)635 static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx)
636 {
637 THI thi = (THI)ctx;
638 PetscErrorCode ierr;
639
640 PetscFunctionBeginUser;
641 ierr = THISetUpDM(thi,dmf);CHKERRQ(ierr);
642 ierr = DMSetMatType(dmf,thi->mattype);CHKERRQ(ierr);
643 ierr = DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi);CHKERRQ(ierr);
644 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
645 ierr = DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr);
646 PetscFunctionReturn(0);
647 }
648
THIDAGetPrm(DM da,PrmNode *** prm)649 static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm)
650 {
651 PetscErrorCode ierr;
652 DM da2prm;
653 Vec X;
654
655 PetscFunctionBeginUser;
656 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);CHKERRQ(ierr);
657 if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
658 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);CHKERRQ(ierr);
659 if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
660 ierr = DMDAVecGetArray(da2prm,X,prm);CHKERRQ(ierr);
661 PetscFunctionReturn(0);
662 }
663
THIDARestorePrm(DM da,PrmNode *** prm)664 static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm)
665 {
666 PetscErrorCode ierr;
667 DM da2prm;
668 Vec X;
669
670 PetscFunctionBeginUser;
671 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);CHKERRQ(ierr);
672 if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
673 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);CHKERRQ(ierr);
674 if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
675 ierr = DMDAVecRestoreArray(da2prm,X,prm);CHKERRQ(ierr);
676 PetscFunctionReturn(0);
677 }
678
THIInitial(SNES snes,Vec X,void * ctx)679 static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx)
680 {
681 THI thi;
682 PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
683 PetscReal hx,hy;
684 PrmNode **prm;
685 Node ***x;
686 PetscErrorCode ierr;
687 DM da;
688
689 PetscFunctionBeginUser;
690 ierr = SNESGetDM(snes,&da);CHKERRQ(ierr);
691 ierr = DMGetApplicationContext(da,&thi);CHKERRQ(ierr);
692 ierr = DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
693 ierr = DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
694 ierr = DMDAVecGetArray(da,X,&x);CHKERRQ(ierr);
695 ierr = THIDAGetPrm(da,&prm);CHKERRQ(ierr);
696 hx = thi->Lx / mx;
697 hy = thi->Ly / my;
698 for (i=xs; i<xs+xm; i++) {
699 for (j=ys; j<ys+ym; j++) {
700 for (k=zs; k<zs+zm; k++) {
701 const PetscScalar zm1 = zm-1,
702 drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
703 drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
704 x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
705 x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
706 }
707 }
708 }
709 ierr = DMDAVecRestoreArray(da,X,&x);CHKERRQ(ierr);
710 ierr = THIDARestorePrm(da,&prm);CHKERRQ(ierr);
711 PetscFunctionReturn(0);
712 }
713
PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar * PETSC_RESTRICT u,PetscScalar * PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal * eta,PetscReal * deta)714 static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta)
715 {
716 PetscInt l,ll;
717 PetscScalar gam;
718
719 du[0] = du[1] = du[2] = 0;
720 dv[0] = dv[1] = dv[2] = 0;
721 *u = 0;
722 *v = 0;
723 for (l=0; l<8; l++) {
724 *u += phi[l] * n[l].u;
725 *v += phi[l] * n[l].v;
726 for (ll=0; ll<3; ll++) {
727 du[ll] += dphi[l][ll] * n[l].u;
728 dv[ll] += dphi[l][ll] * n[l].v;
729 }
730 }
731 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]);
732 THIViscosity(thi,PetscRealPart(gam),eta,deta);
733 }
734
PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar * u,PetscScalar * v,PetscScalar du[],PetscScalar dv[],PetscReal * eta,PetscReal * deta)735 static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta)
736 {
737 PetscInt l,ll;
738 PetscScalar gam;
739
740 du[0] = du[1] = 0;
741 dv[0] = dv[1] = 0;
742 *u = 0;
743 *v = 0;
744 for (l=0; l<4; l++) {
745 *u += phi[l] * n[l].u;
746 *v += phi[l] * n[l].v;
747 for (ll=0; ll<2; ll++) {
748 du[ll] += dphi[l][ll] * n[l].u;
749 dv[ll] += dphi[l][ll] * n[l].v;
750 }
751 }
752 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]);
753 THIViscosity(thi,PetscRealPart(gam),eta,deta);
754 }
755
THIFunctionLocal(DMDALocalInfo * info,Node *** x,Node *** f,THI thi)756 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi)
757 {
758 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l;
759 PetscReal hx,hy,etamin,etamax,beta2min,beta2max;
760 PrmNode **prm;
761 PetscErrorCode ierr;
762
763 PetscFunctionBeginUser;
764 xs = info->zs;
765 ys = info->ys;
766 xm = info->zm;
767 ym = info->ym;
768 zm = info->xm;
769 hx = thi->Lx / info->mz;
770 hy = thi->Ly / info->my;
771
772 etamin = 1e100;
773 etamax = 0;
774 beta2min = 1e100;
775 beta2max = 0;
776
777 ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr);
778
779 for (i=xs; i<xs+xm; i++) {
780 for (j=ys; j<ys+ym; j++) {
781 PrmNode pn[4];
782 QuadExtract(prm,i,j,pn);
783 for (k=0; k<zm-1; k++) {
784 PetscInt ls = 0;
785 Node n[8],*fn[8];
786 PetscReal zn[8],etabase = 0;
787 PrmHexGetZ(pn,k,zm,zn);
788 HexExtract(x,i,j,k,n);
789 HexExtractRef(f,i,j,k,fn);
790 if (thi->no_slip && k == 0) {
791 for (l=0; l<4; l++) n[l].u = n[l].v = 0;
792 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
793 ls = 4;
794 }
795 for (q=0; q<8; q++) {
796 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
797 PetscScalar du[3],dv[3],u,v;
798 HexGrad(HexQDeriv[q],zn,dz);
799 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
800 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
801 jw /= thi->rhog; /* scales residuals to be O(1) */
802 if (q == 0) etabase = eta;
803 RangeUpdate(&etamin,&etamax,eta);
804 for (l=ls; l<8; l++) { /* test functions */
805 const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0};
806 const PetscReal pp = phi[l],*dp = dphi[l];
807 fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
808 fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
809 }
810 }
811 if (k == 0) { /* we are on a bottom face */
812 if (thi->no_slip) {
813 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
814 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature
815 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
816 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in
817 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after
818 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the
819 * assembled matrix (see the similar block in THIJacobianLocal).
820 *
821 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
822 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make
823 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part,
824 * so the solution will exactly satisfy the boundary condition after the first linear iteration.
825 */
826 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.);
827 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
828 fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
829 fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
830 } else { /* Integrate over bottom face to apply boundary condition */
831 for (q=0; q<4; q++) {
832 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
833 PetscScalar u =0,v=0,rbeta2=0;
834 PetscReal beta2,dbeta2;
835 for (l=0; l<4; l++) {
836 u += phi[l]*n[l].u;
837 v += phi[l]*n[l].v;
838 rbeta2 += phi[l]*pn[l].beta2;
839 }
840 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
841 RangeUpdate(&beta2min,&beta2max,beta2);
842 for (l=0; l<4; l++) {
843 const PetscReal pp = phi[l];
844 fn[ls+l]->u += pp*jw*beta2*u;
845 fn[ls+l]->v += pp*jw*beta2*v;
846 }
847 }
848 }
849 }
850 }
851 }
852 }
853
854 ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr);
855
856 ierr = PRangeMinMax(&thi->eta,etamin,etamax);CHKERRQ(ierr);
857 ierr = PRangeMinMax(&thi->beta2,beta2min,beta2max);CHKERRQ(ierr);
858 PetscFunctionReturn(0);
859 }
860
THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)861 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
862 {
863 PetscErrorCode ierr;
864 PetscReal nrm;
865 PetscInt m;
866 PetscMPIInt rank;
867
868 PetscFunctionBeginUser;
869 ierr = MatNorm(B,NORM_FROBENIUS,&nrm);CHKERRQ(ierr);
870 ierr = MatGetSize(B,&m,0);CHKERRQ(ierr);
871 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);CHKERRQ(ierr);
872 if (!rank) {
873 PetscScalar val0,val2;
874 ierr = MatGetValue(B,0,0,&val0);CHKERRQ(ierr);
875 ierr = MatGetValue(B,2,2,&val2);CHKERRQ(ierr);
876 ierr = PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax);CHKERRQ(ierr);
877 }
878 PetscFunctionReturn(0);
879 }
880
THISurfaceStatistics(DM da,Vec X,PetscReal * min,PetscReal * max,PetscReal * mean)881 static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
882 {
883 PetscErrorCode ierr;
884 Node ***x;
885 PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
886 PetscReal umin = 1e100,umax=-1e100;
887 PetscScalar usum = 0.0,gusum;
888
889 PetscFunctionBeginUser;
890 *min = *max = *mean = 0;
891 ierr = DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
892 ierr = DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
893 if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected decomposition");
894 ierr = DMDAVecGetArray(da,X,&x);CHKERRQ(ierr);
895 for (i=xs; i<xs+xm; i++) {
896 for (j=ys; j<ys+ym; j++) {
897 PetscReal u = PetscRealPart(x[i][j][zm-1].u);
898 RangeUpdate(&umin,&umax,u);
899 usum += u;
900 }
901 }
902 ierr = DMDAVecRestoreArray(da,X,&x);CHKERRQ(ierr);
903 ierr = MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
904 ierr = MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
905 ierr = MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
906 *mean = PetscRealPart(gusum) / (mx*my);
907 PetscFunctionReturn(0);
908 }
909
THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[])910 static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[])
911 {
912 MPI_Comm comm;
913 Vec X;
914 DM dm;
915 PetscErrorCode ierr;
916
917 PetscFunctionBeginUser;
918 ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
919 ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr);
920 ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr);
921 ierr = PetscPrintf(comm,"Solution statistics after solve: %s\n",name);CHKERRQ(ierr);
922 {
923 PetscInt its,lits;
924 SNESConvergedReason reason;
925 ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
926 ierr = SNESGetConvergedReason(snes,&reason);CHKERRQ(ierr);
927 ierr = SNESGetLinearSolveIterations(snes,&lits);CHKERRQ(ierr);
928 ierr = PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits);CHKERRQ(ierr);
929 }
930 {
931 PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
932 PetscInt i,j,m;
933 const PetscScalar *x;
934 ierr = VecNorm(X,NORM_2,&nrm2);CHKERRQ(ierr);
935 ierr = VecGetLocalSize(X,&m);CHKERRQ(ierr);
936 ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr);
937 for (i=0; i<m; i+=2) {
938 PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
939 tmin[0] = PetscMin(u,tmin[0]);
940 tmin[1] = PetscMin(v,tmin[1]);
941 tmin[2] = PetscMin(c,tmin[2]);
942 tmax[0] = PetscMax(u,tmax[0]);
943 tmax[1] = PetscMax(v,tmax[1]);
944 tmax[2] = PetscMax(c,tmax[2]);
945 }
946 ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr);
947 ierr = MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr);
948 ierr = MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr);
949 /* Dimensionalize to meters/year */
950 nrm2 *= thi->units->year / thi->units->meter;
951 for (j=0; j<3; j++) {
952 min[j] *= thi->units->year / thi->units->meter;
953 max[j] *= thi->units->year / thi->units->meter;
954 }
955 if (min[0] == 0.0) min[0] = 0.0;
956 ierr = PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2]);CHKERRQ(ierr);
957 {
958 PetscReal umin,umax,umean;
959 ierr = THISurfaceStatistics(dm,X,&umin,&umax,&umean);CHKERRQ(ierr);
960 umin *= thi->units->year / thi->units->meter;
961 umax *= thi->units->year / thi->units->meter;
962 umean *= thi->units->year / thi->units->meter;
963 ierr = PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean);CHKERRQ(ierr);
964 }
965 /* These values stay nondimensional */
966 ierr = PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax);CHKERRQ(ierr);
967 ierr = PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax);CHKERRQ(ierr);
968 }
969 PetscFunctionReturn(0);
970 }
971
THIJacobianLocal_2D(DMDALocalInfo * info,Node ** x,Mat J,Mat B,THI thi)972 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi)
973 {
974 PetscInt xs,ys,xm,ym,i,j,q,l,ll;
975 PetscReal hx,hy;
976 PrmNode **prm;
977 PetscErrorCode ierr;
978
979 PetscFunctionBeginUser;
980 xs = info->ys;
981 ys = info->xs;
982 xm = info->ym;
983 ym = info->xm;
984 hx = thi->Lx / info->my;
985 hy = thi->Ly / info->mx;
986
987 ierr = MatZeroEntries(B);CHKERRQ(ierr);
988 ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr);
989
990 for (i=xs; i<xs+xm; i++) {
991 for (j=ys; j<ys+ym; j++) {
992 Node n[4];
993 PrmNode pn[4];
994 PetscScalar Ke[4*2][4*2];
995 QuadExtract(prm,i,j,pn);
996 QuadExtract(x,i,j,n);
997 ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr);
998 for (q=0; q<4; q++) {
999 PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2;
1000 PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0;
1001 for (l=0; l<4; l++) {
1002 phi[l] = QuadQInterp[q][l];
1003 dphi[l][0] = QuadQDeriv[q][l][0]*2./hx;
1004 dphi[l][1] = QuadQDeriv[q][l][1]*2./hy;
1005 h += phi[l] * pn[l].h;
1006 rbeta2 += phi[l] * pn[l].beta2;
1007 }
1008 jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */
1009 PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1010 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1011 for (l=0; l<4; l++) {
1012 const PetscReal pp = phi[l],*dp = dphi[l];
1013 for (ll=0; ll<4; ll++) {
1014 const PetscReal ppl = phi[ll],*dpl = dphi[ll];
1015 PetscScalar dgdu,dgdv;
1016 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1];
1017 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0];
1018 /* Picard part */
1019 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1020 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1021 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1022 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1023 /* extra Newton terms */
1024 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale;
1025 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale;
1026 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale;
1027 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale;
1028 }
1029 }
1030 }
1031 {
1032 const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}};
1033 ierr = MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr);
1034 }
1035 }
1036 }
1037 ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr);
1038
1039 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1042 if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
1043 PetscFunctionReturn(0);
1044 }
1045
THIJacobianLocal_3D(DMDALocalInfo * info,Node *** x,Mat B,THI thi,THIAssemblyMode amode)1046 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode)
1047 {
1048 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1049 PetscReal hx,hy;
1050 PrmNode **prm;
1051 PetscErrorCode ierr;
1052
1053 PetscFunctionBeginUser;
1054 xs = info->zs;
1055 ys = info->ys;
1056 xm = info->zm;
1057 ym = info->ym;
1058 zm = info->xm;
1059 hx = thi->Lx / info->mz;
1060 hy = thi->Ly / info->my;
1061
1062 ierr = MatZeroEntries(B);CHKERRQ(ierr);
1063 ierr = MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1064 ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr);
1065
1066 for (i=xs; i<xs+xm; i++) {
1067 for (j=ys; j<ys+ym; j++) {
1068 PrmNode pn[4];
1069 QuadExtract(prm,i,j,pn);
1070 for (k=0; k<zm-1; k++) {
1071 Node n[8];
1072 PetscReal zn[8],etabase = 0;
1073 PetscScalar Ke[8*2][8*2];
1074 PetscInt ls = 0;
1075
1076 PrmHexGetZ(pn,k,zm,zn);
1077 HexExtract(x,i,j,k,n);
1078 ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr);
1079 if (thi->no_slip && k == 0) {
1080 for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1081 ls = 4;
1082 }
1083 for (q=0; q<8; q++) {
1084 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
1085 PetscScalar du[3],dv[3],u,v;
1086 HexGrad(HexQDeriv[q],zn,dz);
1087 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1088 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1089 jw /= thi->rhog; /* residuals are scaled by this factor */
1090 if (q == 0) etabase = eta;
1091 for (l=ls; l<8; l++) { /* test functions */
1092 const PetscReal *PETSC_RESTRICT dp = dphi[l];
1093 #if USE_SSE2_KERNELS
1094 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1095 __m128d
1096 p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5),
1097 p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)),
1098 du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]),
1099 dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]),
1100 jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta),
1101 dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]),
1102 dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta),
1103 p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */
1104 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */
1105 pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */
1106 du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */
1107 t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */
1108 t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */
1109
1110 #endif
1111 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1112 for (ll=ls; ll<8; ll++) { /* trial functions */
1113 #else
1114 for (ll=l; ll<8; ll++) {
1115 #endif
1116 const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1117 if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */
1118 #if !USE_SSE2_KERNELS
1119 /* The analytic Jacobian in nice, easy-to-read form */
1120 {
1121 PetscScalar dgdu,dgdv;
1122 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1123 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1124 /* Picard part */
1125 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1126 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1127 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1128 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1129 /* extra Newton terms */
1130 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1131 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1132 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1133 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1134 }
1135 #else
1136 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1137 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1138 * by 25 to 30 percent. */
1139 {
1140 __m128d
1141 keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]),
1142 kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]),
1143 dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]),
1144 t0,t3,pdgduv;
1145 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01),
1146 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10),
1147 _mm_mul_pd(dp2jweta,dpl2))));
1148 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01),
1149 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10),
1150 _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1))))));
1151 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)),
1152 _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))),
1153 _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10),
1154 _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1155 t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */
1156 t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */
1157 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0),
1158 _mm_add_pd(_mm_mul_pd(dp1,t3),
1159 _mm_mul_pd(t0,_mm_mul_pd(dp2,du2))))));
1160 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0),
1161 _mm_add_pd(_mm_mul_pd(dp0,t3),
1162 _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2))))));
1163 }
1164 #endif
1165 }
1166 }
1167 }
1168 if (k == 0) { /* on a bottom face */
1169 if (thi->no_slip) {
1170 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1);
1171 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1172 Ke[0][0] = thi->dirichlet_scale*diagu;
1173 Ke[1][1] = thi->dirichlet_scale*diagv;
1174 } else {
1175 for (q=0; q<4; q++) {
1176 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
1177 PetscScalar u =0,v=0,rbeta2=0;
1178 PetscReal beta2,dbeta2;
1179 for (l=0; l<4; l++) {
1180 u += phi[l]*n[l].u;
1181 v += phi[l]*n[l].v;
1182 rbeta2 += phi[l]*pn[l].beta2;
1183 }
1184 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1185 for (l=0; l<4; l++) {
1186 const PetscReal pp = phi[l];
1187 for (ll=0; ll<4; ll++) {
1188 const PetscReal ppl = phi[ll];
1189 Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1190 Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl;
1191 Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl;
1192 Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1193 }
1194 }
1195 }
1196 }
1197 }
1198 {
1199 const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}};
1200 if (amode == THIASSEMBLY_TRIDIAGONAL) {
1201 for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1202 const PetscInt l4 = l+4;
1203 const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}};
1204 #if defined COMPUTE_LOWER_TRIANGULAR
1205 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1206 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1207 {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1208 {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1209 #else
1210 /* Same as above except for the lower-left block */
1211 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1212 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1213 {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1214 {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1215 #endif
1216 ierr = MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);CHKERRQ(ierr);
1217 }
1218 } else {
1219 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1220 for (l=0; l<8; l++) {
1221 for (ll=l+1; ll<8; ll++) {
1222 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1223 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1224 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1225 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1226 }
1227 }
1228 #endif
1229 ierr = MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr);
1230 }
1231 }
1232 }
1233 }
1234 }
1235 ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr);
1236
1237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1239 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1240 if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
1241 PetscFunctionReturn(0);
1242 }
1243
1244 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1245 {
1246 PetscErrorCode ierr;
1247
1248 PetscFunctionBeginUser;
1249 ierr = THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);CHKERRQ(ierr);
1250 PetscFunctionReturn(0);
1251 }
1252
1253 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1254 {
1255 PetscErrorCode ierr;
1256
1257 PetscFunctionBeginUser;
1258 ierr = THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);CHKERRQ(ierr);
1259 PetscFunctionReturn(0);
1260 }
1261
1262 static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[])
1263 {
1264 PetscErrorCode ierr;
1265 THI thi;
1266 PetscInt dim,M,N,m,n,s,dof;
1267 DM dac,daf;
1268 DMDAStencilType st;
1269 DM_DA *ddf,*ddc;
1270
1271 PetscFunctionBeginUser;
1272 ierr = PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);CHKERRQ(ierr);
1273 if (!thi) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance");
1274 if (nlevels > 1) {
1275 ierr = DMRefineHierarchy(dac0,nlevels-1,hierarchy);CHKERRQ(ierr);
1276 dac = hierarchy[nlevels-2];
1277 } else {
1278 dac = dac0;
1279 }
1280 ierr = DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st);CHKERRQ(ierr);
1281 if (dim != 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs");
1282
1283 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1284 ierr = DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf);CHKERRQ(ierr);
1285 ierr = DMSetUp(daf);CHKERRQ(ierr);
1286
1287 daf->ops->creatematrix = dac->ops->creatematrix;
1288 daf->ops->createinterpolation = dac->ops->createinterpolation;
1289 daf->ops->getcoloring = dac->ops->getcoloring;
1290 ddf = (DM_DA*)daf->data;
1291 ddc = (DM_DA*)dac->data;
1292 ddf->interptype = ddc->interptype;
1293
1294 ierr = DMDASetFieldName(daf,0,"x-velocity");CHKERRQ(ierr);
1295 ierr = DMDASetFieldName(daf,1,"y-velocity");CHKERRQ(ierr);
1296
1297 hierarchy[nlevels-1] = daf;
1298 PetscFunctionReturn(0);
1299 }
1300
1301 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale)
1302 {
1303 PetscErrorCode ierr;
1304 PetscInt dim;
1305
1306 PetscFunctionBeginUser;
1307 PetscValidHeaderSpecific(dac,DM_CLASSID,1);
1308 PetscValidHeaderSpecific(daf,DM_CLASSID,2);
1309 PetscValidPointer(A,3);
1310 if (scale) PetscValidPointer(scale,4);
1311 ierr = DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
1312 if (dim == 2) {
1313 /* We are in the 2D problem and use normal DMDA interpolation */
1314 ierr = DMCreateInterpolation(dac,daf,A,scale);CHKERRQ(ierr);
1315 } else {
1316 PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart;
1317 Mat B;
1318
1319 ierr = DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1320 ierr = DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
1321 if (zs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"unexpected");
1322 ierr = MatCreate(PetscObjectComm((PetscObject)daf),&B);CHKERRQ(ierr);
1323 ierr = MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);CHKERRQ(ierr);
1324
1325 ierr = MatSetType(B,MATAIJ);CHKERRQ(ierr);
1326 ierr = MatSeqAIJSetPreallocation(B,1,NULL);CHKERRQ(ierr);
1327 ierr = MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);CHKERRQ(ierr);
1328 ierr = MatGetOwnershipRange(B,&rstart,NULL);CHKERRQ(ierr);
1329 ierr = MatGetOwnershipRangeColumn(B,&cstart,NULL);CHKERRQ(ierr);
1330 for (i=xs; i<xs+xm; i++) {
1331 for (j=ys; j<ys+ym; j++) {
1332 for (k=zs; k<zs+zm; k++) {
1333 PetscInt i2 = i*ym+j,i3 = i2*zm+k;
1334 PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */
1335 ierr = MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);CHKERRQ(ierr);
1336 }
1337 }
1338 }
1339 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1340 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1341 ierr = MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);CHKERRQ(ierr);
1342 ierr = MatDestroy(&B);CHKERRQ(ierr);
1343 }
1344 PetscFunctionReturn(0);
1345 }
1346
1347 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J)
1348 {
1349 PetscErrorCode ierr;
1350 Mat A;
1351 PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3];
1352 ISLocalToGlobalMapping ltog;
1353
1354 PetscFunctionBeginUser;
1355 ierr = DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1356 if (dim != 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D");
1357 ierr = DMDAGetCorners(da,0,0,0,&zm,&ym,&xm);CHKERRQ(ierr);
1358 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr);
1359 ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
1360 ierr = MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1361 ierr = MatSetType(A,da->mattype);CHKERRQ(ierr);
1362 ierr = MatSetFromOptions(A);CHKERRQ(ierr);
1363 ierr = MatSeqAIJSetPreallocation(A,3*2,NULL);CHKERRQ(ierr);
1364 ierr = MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL);CHKERRQ(ierr);
1365 ierr = MatSeqBAIJSetPreallocation(A,2,3,NULL);CHKERRQ(ierr);
1366 ierr = MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL);CHKERRQ(ierr);
1367 ierr = MatSeqSBAIJSetPreallocation(A,2,2,NULL);CHKERRQ(ierr);
1368 ierr = MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL);CHKERRQ(ierr);
1369 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
1370 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
1371 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
1372 *J = A;
1373 PetscFunctionReturn(0);
1374 }
1375
1376 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[])
1377 {
1378 const PetscInt dof = 2;
1379 Units units = thi->units;
1380 MPI_Comm comm;
1381 PetscErrorCode ierr;
1382 PetscViewer viewer;
1383 PetscMPIInt rank,size,tag,nn,nmax;
1384 PetscInt mx,my,mz,r,range[6];
1385 const PetscScalar *x;
1386
1387 PetscFunctionBeginUser;
1388 ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1389 ierr = DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1390 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1391 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1392 ierr = PetscViewerASCIIOpen(comm,filename,&viewer);CHKERRQ(ierr);
1393 ierr = PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr);
1394 ierr = PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1);CHKERRQ(ierr);
1395
1396 ierr = DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);CHKERRQ(ierr);
1397 ierr = PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);CHKERRQ(ierr);
1398 ierr = MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1399 tag = ((PetscObject) viewer)->tag;
1400 ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr);
1401 if (!rank) {
1402 PetscScalar *array;
1403 ierr = PetscMalloc1(nmax,&array);CHKERRQ(ierr);
1404 for (r=0; r<size; r++) {
1405 PetscInt i,j,k,xs,xm,ys,ym,zs,zm;
1406 const PetscScalar *ptr;
1407 MPI_Status status;
1408 if (r) {
1409 ierr = MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
1410 }
1411 zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1412 if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1413 if (r) {
1414 ierr = MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);CHKERRQ(ierr);
1415 ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn);CHKERRQ(ierr);
1416 if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1417 ptr = array;
1418 } else ptr = x;
1419 ierr = PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);CHKERRQ(ierr);
1420
1421 ierr = PetscViewerASCIIPrintf(viewer," <Points>\n");CHKERRQ(ierr);
1422 ierr = PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1423 for (i=xs; i<xs+xm; i++) {
1424 for (j=ys; j<ys+ym; j++) {
1425 for (k=zs; k<zs+zm; k++) {
1426 PrmNode p;
1427 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz;
1428 thi->initialize(thi,xx,yy,&p);
1429 zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1);
1430 ierr = PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz);CHKERRQ(ierr);
1431 }
1432 }
1433 }
1434 ierr = PetscViewerASCIIPrintf(viewer," </DataArray>\n");CHKERRQ(ierr);
1435 ierr = PetscViewerASCIIPrintf(viewer," </Points>\n");CHKERRQ(ierr);
1436
1437 ierr = PetscViewerASCIIPrintf(viewer," <PointData>\n");CHKERRQ(ierr);
1438 ierr = PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1439 for (i=0; i<nn; i+=dof) {
1440 ierr = PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0);CHKERRQ(ierr);
1441 }
1442 ierr = PetscViewerASCIIPrintf(viewer," </DataArray>\n");CHKERRQ(ierr);
1443
1444 ierr = PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");CHKERRQ(ierr);
1445 for (i=0; i<nn; i+=dof) {
1446 ierr = PetscViewerASCIIPrintf(viewer,"%D\n",r);CHKERRQ(ierr);
1447 }
1448 ierr = PetscViewerASCIIPrintf(viewer," </DataArray>\n");CHKERRQ(ierr);
1449 ierr = PetscViewerASCIIPrintf(viewer," </PointData>\n");CHKERRQ(ierr);
1450
1451 ierr = PetscViewerASCIIPrintf(viewer," </Piece>\n");CHKERRQ(ierr);
1452 }
1453 ierr = PetscFree(array);CHKERRQ(ierr);
1454 } else {
1455 ierr = MPI_Send(range,6,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
1456 ierr = MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm);CHKERRQ(ierr);
1457 }
1458 ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr);
1459 ierr = PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n");CHKERRQ(ierr);
1460 ierr = PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");CHKERRQ(ierr);
1461 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1462 PetscFunctionReturn(0);
1463 }
1464
1465 int main(int argc,char *argv[])
1466 {
1467 MPI_Comm comm;
1468 THI thi;
1469 PetscErrorCode ierr;
1470 DM da;
1471 SNES snes;
1472
1473 ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr;
1474 comm = PETSC_COMM_WORLD;
1475
1476 ierr = THICreate(comm,&thi);CHKERRQ(ierr);
1477 {
1478 PetscInt M = 3,N = 3,P = 2;
1479 ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");CHKERRQ(ierr);
1480 {
1481 ierr = PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);CHKERRQ(ierr);
1482 N = M;
1483 ierr = PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);CHKERRQ(ierr);
1484 if (thi->coarse2d) {
1485 ierr = PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);CHKERRQ(ierr);
1486 } else {
1487 ierr = PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);CHKERRQ(ierr);
1488 }
1489 }
1490 ierr = PetscOptionsEnd();CHKERRQ(ierr);
1491 if (thi->coarse2d) {
1492 ierr = DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);CHKERRQ(ierr);
1493 ierr = DMSetFromOptions(da);CHKERRQ(ierr);
1494 ierr = DMSetUp(da);CHKERRQ(ierr);
1495 da->ops->refinehierarchy = DMRefineHierarchy_THI;
1496 da->ops->createinterpolation = DMCreateInterpolation_DA_THI;
1497
1498 ierr = PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);CHKERRQ(ierr);
1499 } else {
1500 ierr = DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);CHKERRQ(ierr);
1501 ierr = DMSetFromOptions(da);CHKERRQ(ierr);
1502 ierr = DMSetUp(da);CHKERRQ(ierr);
1503 }
1504 ierr = DMDASetFieldName(da,0,"x-velocity");CHKERRQ(ierr);
1505 ierr = DMDASetFieldName(da,1,"y-velocity");CHKERRQ(ierr);
1506 }
1507 ierr = THISetUpDM(thi,da);CHKERRQ(ierr);
1508 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;
1509
1510 { /* Set the fine level matrix type if -da_refine */
1511 PetscInt rlevel,clevel;
1512 ierr = DMGetRefineLevel(da,&rlevel);CHKERRQ(ierr);
1513 ierr = DMGetCoarsenLevel(da,&clevel);CHKERRQ(ierr);
1514 if (rlevel - clevel > 0) {ierr = DMSetMatType(da,thi->mattype);CHKERRQ(ierr);}
1515 }
1516
1517 ierr = DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi);CHKERRQ(ierr);
1518 if (thi->tridiagonal) {
1519 ierr = DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi);CHKERRQ(ierr);
1520 } else {
1521 ierr = DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);CHKERRQ(ierr);
1522 }
1523 ierr = DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr);
1524 ierr = DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi);CHKERRQ(ierr);
1525
1526 ierr = DMSetApplicationContext(da,thi);CHKERRQ(ierr);
1527
1528 ierr = SNESCreate(comm,&snes);CHKERRQ(ierr);
1529 ierr = SNESSetDM(snes,da);CHKERRQ(ierr);
1530 ierr = DMDestroy(&da);CHKERRQ(ierr);
1531 ierr = SNESSetComputeInitialGuess(snes,THIInitial,NULL);CHKERRQ(ierr);
1532 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
1533
1534 ierr = SNESSolve(snes,NULL,NULL);CHKERRQ(ierr);
1535
1536 ierr = THISolveStatistics(thi,snes,0,"Full");CHKERRQ(ierr);
1537
1538 {
1539 PetscBool flg;
1540 char filename[PETSC_MAX_PATH_LEN] = "";
1541 ierr = PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg);CHKERRQ(ierr);
1542 if (flg) {
1543 Vec X;
1544 DM dm;
1545 ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr);
1546 ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr);
1547 ierr = THIDAVecView_VTK_XML(thi,dm,X,filename);CHKERRQ(ierr);
1548 }
1549 }
1550
1551 ierr = DMDestroy(&da);CHKERRQ(ierr);
1552 ierr = SNESDestroy(&snes);CHKERRQ(ierr);
1553 ierr = THIDestroy(&thi);CHKERRQ(ierr);
1554 ierr = PetscFinalize();
1555 return ierr;
1556 }
1557
1558
1559 /*TEST
1560
1561 build:
1562 requires: !single
1563
1564 test:
1565 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc
1566
1567 test:
1568 suffix: 2
1569 nsize: 2
1570 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1
1571
1572 test:
1573 suffix: 3
1574 nsize: 3
1575 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current
1576
1577 test:
1578 suffix: 4
1579 nsize: 6
1580 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1
1581
1582 test:
1583 suffix: 5
1584 nsize: 6
1585 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}}
1586
1587 TEST*/
1588