1
2 /*
3 Factorization code for SBAIJ format.
4 */
5
6 #include <../src/mat/impls/sbaij/seq/sbaij.h>
7 #include <../src/mat/impls/baij/seq/baij.h>
8 #include <petsc/private/kernels/blockinvert.h>
9
MatSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx)10 PetscErrorCode MatSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx)
11 {
12 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
13 IS isrow=a->row;
14 PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j;
15 PetscErrorCode ierr;
16 const PetscInt *r;
17 PetscInt nz,*vj,k,idx,k1;
18 PetscInt bs =A->rmap->bs,bs2 = a->bs2;
19 const MatScalar *aa=a->a,*v,*diag;
20 PetscScalar *x,*xk,*xj,*xk_tmp,*t;
21 const PetscScalar *b;
22
23 PetscFunctionBegin;
24 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
25 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
26 t = a->solve_work;
27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
28 ierr = PetscMalloc1(bs,&xk_tmp);CHKERRQ(ierr);
29
30 /* solve U^T * D * y = b by forward substitution */
31 xk = t;
32 for (k=0; k<mbs; k++) { /* t <- perm(b) */
33 idx = bs*r[k];
34 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
35 }
36 for (k=0; k<mbs; k++) {
37 v = aa + bs2*ai[k];
38 xk = t + k*bs; /* Dk*xk = k-th block of x */
39 ierr = PetscArraycpy(xk_tmp,xk,bs);CHKERRQ(ierr); /* xk_tmp <- xk */
40 nz = ai[k+1] - ai[k];
41 vj = aj + ai[k];
42 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */
43 while (nz--) {
44 /* x(:) += U(k,:)^T*(Dk*xk) */
45 PetscKernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
46 vj++; xj = t + (*vj)*bs;
47 v += bs2;
48 }
49 /* xk = inv(Dk)*(Dk*xk) */
50 diag = aa+k*bs2; /* ptr to inv(Dk) */
51 PetscKernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
52 }
53
54 /* solve U*x = y by back substitution */
55 for (k=mbs-1; k>=0; k--) {
56 v = aa + bs2*ai[k];
57 xk = t + k*bs; /* xk */
58 nz = ai[k+1] - ai[k];
59 vj = aj + ai[k];
60 xj = t + (*vj)*bs;
61 while (nz--) {
62 /* xk += U(k,:)*x(:) */
63 PetscKernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
64 vj++;
65 v += bs2; xj = t + (*vj)*bs;
66 }
67 idx = bs*r[k];
68 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
69 }
70
71 ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
72 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
73 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
74 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
75 ierr = PetscLogFlops(4.0*bs2*a->nz -(bs+2.0*bs2)*mbs);CHKERRQ(ierr);
76 PetscFunctionReturn(0);
77 }
78
MatForwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx)79 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx)
80 {
81 PetscFunctionBegin;
82 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"not implemented yet");
83 PetscFunctionReturn(0);
84 }
85
MatBackwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx)86 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx)
87 {
88 PetscFunctionBegin;
89 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
90 PetscFunctionReturn(0);
91 }
92
MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscInt bs,PetscScalar * x)93 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
94 {
95 PetscErrorCode ierr;
96 PetscInt nz,k;
97 const PetscInt *vj,bs2 = bs*bs;
98 const MatScalar *v,*diag;
99 PetscScalar *xk,*xj,*xk_tmp;
100
101 PetscFunctionBegin;
102 ierr = PetscMalloc1(bs,&xk_tmp);CHKERRQ(ierr);
103 for (k=0; k<mbs; k++) {
104 v = aa + bs2*ai[k];
105 xk = x + k*bs; /* Dk*xk = k-th block of x */
106 ierr = PetscArraycpy(xk_tmp,xk,bs);CHKERRQ(ierr); /* xk_tmp <- xk */
107 nz = ai[k+1] - ai[k];
108 vj = aj + ai[k];
109 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */
110 while (nz--) {
111 /* x(:) += U(k,:)^T*(Dk*xk) */
112 PetscKernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
113 vj++; xj = x + (*vj)*bs;
114 v += bs2;
115 }
116 /* xk = inv(Dk)*(Dk*xk) */
117 diag = aa+k*bs2; /* ptr to inv(Dk) */
118 PetscKernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
119 }
120 ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
121 PetscFunctionReturn(0);
122 }
123
MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscInt bs,PetscScalar * x)124 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
125 {
126 PetscInt nz,k;
127 const PetscInt *vj,bs2 = bs*bs;
128 const MatScalar *v;
129 PetscScalar *xk,*xj;
130
131 PetscFunctionBegin;
132 for (k=mbs-1; k>=0; k--) {
133 v = aa + bs2*ai[k];
134 xk = x + k*bs; /* xk */
135 nz = ai[k+1] - ai[k];
136 vj = aj + ai[k];
137 xj = x + (*vj)*bs;
138 while (nz--) {
139 /* xk += U(k,:)*x(:) */
140 PetscKernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
141 vj++;
142 v += bs2; xj = x + (*vj)*bs;
143 }
144 }
145 PetscFunctionReturn(0);
146 }
147
MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)148 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
149 {
150 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
151 PetscErrorCode ierr;
152 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
153 PetscInt bs =A->rmap->bs;
154 const MatScalar *aa=a->a;
155 PetscScalar *x;
156 const PetscScalar *b;
157
158 PetscFunctionBegin;
159 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
160 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
161
162 /* solve U^T * D * y = b by forward substitution */
163 ierr = PetscArraycpy(x,b,bs*mbs);CHKERRQ(ierr); /* x <- b */
164 ierr = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
165
166 /* solve U*x = y by back substitution */
167 ierr = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
168
169 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
170 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
171 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
172 PetscFunctionReturn(0);
173 }
174
MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)175 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
176 {
177 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
178 PetscErrorCode ierr;
179 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
180 PetscInt bs =A->rmap->bs;
181 const MatScalar *aa=a->a;
182 const PetscScalar *b;
183 PetscScalar *x;
184
185 PetscFunctionBegin;
186 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
187 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
188 ierr = PetscArraycpy(x,b,bs*mbs);CHKERRQ(ierr); /* x <- b */
189 ierr = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
190 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
191 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
192 ierr = PetscLogFlops(2.0*a->bs2*a->nz - bs*mbs);CHKERRQ(ierr);
193 PetscFunctionReturn(0);
194 }
195
MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)196 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
197 {
198 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
199 PetscErrorCode ierr;
200 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
201 PetscInt bs =A->rmap->bs;
202 const MatScalar *aa=a->a;
203 const PetscScalar *b;
204 PetscScalar *x;
205
206 PetscFunctionBegin;
207 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
208 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
209 ierr = PetscArraycpy(x,b,bs*mbs);CHKERRQ(ierr);
210 ierr = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
211 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
212 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
213 ierr = PetscLogFlops(2.0*a->bs2*(a->nz-mbs));CHKERRQ(ierr);
214 PetscFunctionReturn(0);
215 }
216
MatSolve_SeqSBAIJ_7_inplace(Mat A,Vec bb,Vec xx)217 PetscErrorCode MatSolve_SeqSBAIJ_7_inplace(Mat A,Vec bb,Vec xx)
218 {
219 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
220 IS isrow=a->row;
221 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,*r,*vj;
222 PetscErrorCode ierr;
223 PetscInt nz,k,idx;
224 const MatScalar *aa=a->a,*v,*d;
225 PetscScalar *x,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
226 const PetscScalar *b;
227
228 PetscFunctionBegin;
229 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
231 t = a->solve_work;
232 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
233
234 /* solve U^T * D * y = b by forward substitution */
235 tp = t;
236 for (k=0; k<mbs; k++) { /* t <- perm(b) */
237 idx = 7*r[k];
238 tp[0] = b[idx];
239 tp[1] = b[idx+1];
240 tp[2] = b[idx+2];
241 tp[3] = b[idx+3];
242 tp[4] = b[idx+4];
243 tp[5] = b[idx+5];
244 tp[6] = b[idx+6];
245 tp += 7;
246 }
247
248 for (k=0; k<mbs; k++) {
249 v = aa + 49*ai[k];
250 vj = aj + ai[k];
251 tp = t + k*7;
252 x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
253 nz = ai[k+1] - ai[k];
254 tp = t + (*vj)*7;
255 while (nz--) {
256 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
257 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
258 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
259 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
260 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
261 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
262 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
263 vj++;
264 tp = t + (*vj)*7;
265 v += 49;
266 }
267
268 /* xk = inv(Dk)*(Dk*xk) */
269 d = aa+k*49; /* ptr to inv(Dk) */
270 tp = t + k*7;
271 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
272 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
273 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
274 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
275 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
276 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
277 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
278 }
279
280 /* solve U*x = y by back substitution */
281 for (k=mbs-1; k>=0; k--) {
282 v = aa + 49*ai[k];
283 vj = aj + ai[k];
284 tp = t + k*7;
285 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */
286 nz = ai[k+1] - ai[k];
287
288 tp = t + (*vj)*7;
289 while (nz--) {
290 /* xk += U(k,:)*x(:) */
291 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
292 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
293 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
294 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
295 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
296 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
297 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
298 vj++;
299 tp = t + (*vj)*7;
300 v += 49;
301 }
302 tp = t + k*7;
303 tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
304 idx = 7*r[k];
305 x[idx] = x0;
306 x[idx+1] = x1;
307 x[idx+2] = x2;
308 x[idx+3] = x3;
309 x[idx+4] = x4;
310 x[idx+5] = x5;
311 x[idx+6] = x6;
312 }
313
314 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
315 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
316 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
317 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
318 PetscFunctionReturn(0);
319 }
320
MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)321 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
322 {
323 const MatScalar *v,*d;
324 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6;
325 PetscInt nz,k;
326 const PetscInt *vj;
327
328 PetscFunctionBegin;
329 for (k=0; k<mbs; k++) {
330 v = aa + 49*ai[k];
331 xp = x + k*7;
332 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
333 nz = ai[k+1] - ai[k];
334 vj = aj + ai[k];
335 xp = x + (*vj)*7;
336 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
337 PetscPrefetchBlock(v+49*nz,49*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
338 while (nz--) {
339 /* x(:) += U(k,:)^T*(Dk*xk) */
340 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
341 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
342 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
343 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
344 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
345 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
346 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
347 vj++;
348 xp = x + (*vj)*7;
349 v += 49;
350 }
351 /* xk = inv(Dk)*(Dk*xk) */
352 d = aa+k*49; /* ptr to inv(Dk) */
353 xp = x + k*7;
354 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
355 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
356 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
357 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
358 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
359 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
360 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
361 }
362 PetscFunctionReturn(0);
363 }
364
MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)365 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
366 {
367 const MatScalar *v;
368 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6;
369 PetscInt nz,k;
370 const PetscInt *vj;
371
372 PetscFunctionBegin;
373 for (k=mbs-1; k>=0; k--) {
374 v = aa + 49*ai[k];
375 xp = x + k*7;
376 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
377 nz = ai[k+1] - ai[k];
378 vj = aj + ai[k];
379 xp = x + (*vj)*7;
380 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
381 PetscPrefetchBlock(v-49*nz,49*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
382 while (nz--) {
383 /* xk += U(k,:)*x(:) */
384 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
385 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
386 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
387 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
388 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
389 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
390 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
391 vj++;
392 v += 49; xp = x + (*vj)*7;
393 }
394 xp = x + k*7;
395 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
396 }
397 PetscFunctionReturn(0);
398 }
399
MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)400 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
401 {
402 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
403 PetscErrorCode ierr;
404 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
405 const MatScalar *aa=a->a;
406 PetscScalar *x;
407 const PetscScalar *b;
408
409 PetscFunctionBegin;
410 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
411 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
412
413 /* solve U^T * D * y = b by forward substitution */
414 ierr = PetscArraycpy(x,b,7*mbs);CHKERRQ(ierr); /* x <- b */
415 ierr = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
416
417 /* solve U*x = y by back substitution */
418 ierr = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
419
420 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
421 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
422 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
423 PetscFunctionReturn(0);
424 }
425
MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)426 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
427 {
428 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
429 PetscErrorCode ierr;
430 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
431 const MatScalar *aa=a->a;
432 PetscScalar *x;
433 const PetscScalar *b;
434
435 PetscFunctionBegin;
436 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
437 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
438 ierr = PetscArraycpy(x,b,7*mbs);CHKERRQ(ierr);
439 ierr = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
440 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
442 ierr = PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);CHKERRQ(ierr);
443 PetscFunctionReturn(0);
444 }
445
MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)446 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
447 {
448 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
449 PetscErrorCode ierr;
450 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
451 const MatScalar *aa=a->a;
452 PetscScalar *x;
453 const PetscScalar *b;
454
455 PetscFunctionBegin;
456 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
457 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
458 ierr = PetscArraycpy(x,b,7*mbs);CHKERRQ(ierr);
459 ierr = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
460 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
461 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
462 ierr = PetscLogFlops(2.0*a->bs2*(a->nz-mbs));CHKERRQ(ierr);
463 PetscFunctionReturn(0);
464 }
465
MatSolve_SeqSBAIJ_6_inplace(Mat A,Vec bb,Vec xx)466 PetscErrorCode MatSolve_SeqSBAIJ_6_inplace(Mat A,Vec bb,Vec xx)
467 {
468 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
469 IS isrow=a->row;
470 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j,*r,*vj;
471 PetscErrorCode ierr;
472 PetscInt nz,k,idx;
473 const MatScalar *aa=a->a,*v,*d;
474 PetscScalar *x,x0,x1,x2,x3,x4,x5,*t,*tp;
475 const PetscScalar *b;
476
477 PetscFunctionBegin;
478 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
480 t = a->solve_work;
481 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
482
483 /* solve U^T * D * y = b by forward substitution */
484 tp = t;
485 for (k=0; k<mbs; k++) { /* t <- perm(b) */
486 idx = 6*r[k];
487 tp[0] = b[idx];
488 tp[1] = b[idx+1];
489 tp[2] = b[idx+2];
490 tp[3] = b[idx+3];
491 tp[4] = b[idx+4];
492 tp[5] = b[idx+5];
493 tp += 6;
494 }
495
496 for (k=0; k<mbs; k++) {
497 v = aa + 36*ai[k];
498 vj = aj + ai[k];
499 tp = t + k*6;
500 x0 =tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
501 nz = ai[k+1] - ai[k];
502 tp = t + (*vj)*6;
503 while (nz--) {
504 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
505 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
506 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
507 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
508 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
509 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
510 vj++;
511 tp = t + (*vj)*6;
512 v += 36;
513 }
514
515 /* xk = inv(Dk)*(Dk*xk) */
516 d = aa+k*36; /* ptr to inv(Dk) */
517 tp = t + k*6;
518 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
519 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
520 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
521 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
522 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
523 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
524 }
525
526 /* solve U*x = y by back substitution */
527 for (k=mbs-1; k>=0; k--) {
528 v = aa + 36*ai[k];
529 vj = aj + ai[k];
530 tp = t + k*6;
531 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */
532 nz = ai[k+1] - ai[k];
533
534 tp = t + (*vj)*6;
535 while (nz--) {
536 /* xk += U(k,:)*x(:) */
537 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
538 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
539 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
540 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
541 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
542 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
543 vj++;
544 tp = t + (*vj)*6;
545 v += 36;
546 }
547 tp = t + k*6;
548 tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
549 idx = 6*r[k];
550 x[idx] = x0;
551 x[idx+1] = x1;
552 x[idx+2] = x2;
553 x[idx+3] = x3;
554 x[idx+4] = x4;
555 x[idx+5] = x5;
556 }
557
558 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
559 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
560 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
561 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
562 PetscFunctionReturn(0);
563 }
564
MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)565 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
566 {
567 const MatScalar *v,*d;
568 PetscScalar *xp,x0,x1,x2,x3,x4,x5;
569 PetscInt nz,k;
570 const PetscInt *vj;
571
572 PetscFunctionBegin;
573 for (k=0; k<mbs; k++) {
574 v = aa + 36*ai[k];
575 xp = x + k*6;
576 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
577 nz = ai[k+1] - ai[k];
578 vj = aj + ai[k];
579 xp = x + (*vj)*6;
580 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
581 PetscPrefetchBlock(v+36*nz,36*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
582 while (nz--) {
583 /* x(:) += U(k,:)^T*(Dk*xk) */
584 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
585 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
586 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
587 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
588 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
589 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
590 vj++;
591 xp = x + (*vj)*6;
592 v += 36;
593 }
594 /* xk = inv(Dk)*(Dk*xk) */
595 d = aa+k*36; /* ptr to inv(Dk) */
596 xp = x + k*6;
597 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
598 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
599 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
600 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
601 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
602 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
603 }
604 PetscFunctionReturn(0);
605 }
MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)606 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
607 {
608 const MatScalar *v;
609 PetscScalar *xp,x0,x1,x2,x3,x4,x5;
610 PetscInt nz,k;
611 const PetscInt *vj;
612
613 PetscFunctionBegin;
614 for (k=mbs-1; k>=0; k--) {
615 v = aa + 36*ai[k];
616 xp = x + k*6;
617 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
618 nz = ai[k+1] - ai[k];
619 vj = aj + ai[k];
620 xp = x + (*vj)*6;
621 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
622 PetscPrefetchBlock(v-36*nz,36*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
623 while (nz--) {
624 /* xk += U(k,:)*x(:) */
625 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
626 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
627 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
628 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
629 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
630 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
631 vj++;
632 v += 36; xp = x + (*vj)*6;
633 }
634 xp = x + k*6;
635 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
636 }
637 PetscFunctionReturn(0);
638 }
639
640
MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)641 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
642 {
643 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
644 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
645 const MatScalar *aa=a->a;
646 PetscScalar *x;
647 const PetscScalar *b;
648 PetscErrorCode ierr;
649
650 PetscFunctionBegin;
651 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
652 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
653
654 /* solve U^T * D * y = b by forward substitution */
655 ierr = PetscArraycpy(x,b,6*mbs);CHKERRQ(ierr); /* x <- b */
656 ierr = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
657
658 /* solve U*x = y by back substitution */
659 ierr = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
660
661 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
662 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
663 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
664 PetscFunctionReturn(0);
665 }
666
MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)667 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
668 {
669 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
670 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
671 const MatScalar *aa=a->a;
672 PetscScalar *x;
673 const PetscScalar *b;
674 PetscErrorCode ierr;
675
676 PetscFunctionBegin;
677 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
678 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
679 ierr = PetscArraycpy(x,b,6*mbs);CHKERRQ(ierr); /* x <- b */
680 ierr = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
681 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
682 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
683 ierr = PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);CHKERRQ(ierr);
684 PetscFunctionReturn(0);
685 }
686
MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)687 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
688 {
689 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
690 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
691 const MatScalar *aa=a->a;
692 PetscScalar *x;
693 const PetscScalar *b;
694 PetscErrorCode ierr;
695
696 PetscFunctionBegin;
697 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
699 ierr = PetscArraycpy(x,b,6*mbs);CHKERRQ(ierr); /* x <- b */
700 ierr = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
701 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
702 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
703 ierr = PetscLogFlops(2.0*a->bs2*(a->nz - mbs));CHKERRQ(ierr);
704 PetscFunctionReturn(0);
705 }
706
MatSolve_SeqSBAIJ_5_inplace(Mat A,Vec bb,Vec xx)707 PetscErrorCode MatSolve_SeqSBAIJ_5_inplace(Mat A,Vec bb,Vec xx)
708 {
709 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
710 IS isrow=a->row;
711 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j;
712 PetscErrorCode ierr;
713 const PetscInt *r,*vj;
714 PetscInt nz,k,idx;
715 const MatScalar *aa=a->a,*v,*diag;
716 PetscScalar *x,x0,x1,x2,x3,x4,*t,*tp;
717 const PetscScalar *b;
718
719 PetscFunctionBegin;
720 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
721 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
722 t = a->solve_work;
723 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
724
725 /* solve U^T * D * y = b by forward substitution */
726 tp = t;
727 for (k=0; k<mbs; k++) { /* t <- perm(b) */
728 idx = 5*r[k];
729 tp[0] = b[idx];
730 tp[1] = b[idx+1];
731 tp[2] = b[idx+2];
732 tp[3] = b[idx+3];
733 tp[4] = b[idx+4];
734 tp += 5;
735 }
736
737 for (k=0; k<mbs; k++) {
738 v = aa + 25*ai[k];
739 vj = aj + ai[k];
740 tp = t + k*5;
741 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
742 nz = ai[k+1] - ai[k];
743
744 tp = t + (*vj)*5;
745 while (nz--) {
746 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
747 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
748 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
749 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
750 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
751 vj++;
752 tp = t + (*vj)*5;
753 v += 25;
754 }
755
756 /* xk = inv(Dk)*(Dk*xk) */
757 diag = aa+k*25; /* ptr to inv(Dk) */
758 tp = t + k*5;
759 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
760 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
761 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
762 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
763 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
764 }
765
766 /* solve U*x = y by back substitution */
767 for (k=mbs-1; k>=0; k--) {
768 v = aa + 25*ai[k];
769 vj = aj + ai[k];
770 tp = t + k*5;
771 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; /* xk */
772 nz = ai[k+1] - ai[k];
773
774 tp = t + (*vj)*5;
775 while (nz--) {
776 /* xk += U(k,:)*x(:) */
777 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
778 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
779 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
780 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
781 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
782 vj++;
783 tp = t + (*vj)*5;
784 v += 25;
785 }
786 tp = t + k*5;
787 tp[0] = x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
788 idx = 5*r[k];
789 x[idx] = x0;
790 x[idx+1] = x1;
791 x[idx+2] = x2;
792 x[idx+3] = x3;
793 x[idx+4] = x4;
794 }
795
796 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
797 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
798 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
799 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
800 PetscFunctionReturn(0);
801 }
802
MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)803 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
804 {
805 const MatScalar *v,*diag;
806 PetscScalar *xp,x0,x1,x2,x3,x4;
807 PetscInt nz,k;
808 const PetscInt *vj;
809
810 PetscFunctionBegin;
811 for (k=0; k<mbs; k++) {
812 v = aa + 25*ai[k];
813 xp = x + k*5;
814 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
815 nz = ai[k+1] - ai[k];
816 vj = aj + ai[k];
817 xp = x + (*vj)*5;
818 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
819 PetscPrefetchBlock(v+25*nz,25*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
820 while (nz--) {
821 /* x(:) += U(k,:)^T*(Dk*xk) */
822 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
823 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
824 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
825 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
826 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
827 vj++;
828 xp = x + (*vj)*5;
829 v += 25;
830 }
831 /* xk = inv(Dk)*(Dk*xk) */
832 diag = aa+k*25; /* ptr to inv(Dk) */
833 xp = x + k*5;
834 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
835 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
836 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
837 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
838 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
839 }
840 PetscFunctionReturn(0);
841 }
842
MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)843 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
844 {
845 const MatScalar *v;
846 PetscScalar *xp,x0,x1,x2,x3,x4;
847 PetscInt nz,k;
848 const PetscInt *vj;
849
850 PetscFunctionBegin;
851 for (k=mbs-1; k>=0; k--) {
852 v = aa + 25*ai[k];
853 xp = x + k*5;
854 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; /* xk */
855 nz = ai[k+1] - ai[k];
856 vj = aj + ai[k];
857 xp = x + (*vj)*5;
858 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
859 PetscPrefetchBlock(v-25*nz,25*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
860 while (nz--) {
861 /* xk += U(k,:)*x(:) */
862 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
863 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
864 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
865 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
866 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
867 vj++;
868 v += 25; xp = x + (*vj)*5;
869 }
870 xp = x + k*5;
871 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
872 }
873 PetscFunctionReturn(0);
874 }
875
MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)876 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
877 {
878 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
879 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
880 const MatScalar *aa=a->a;
881 PetscScalar *x;
882 const PetscScalar *b;
883 PetscErrorCode ierr;
884
885 PetscFunctionBegin;
886 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
887 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
888
889 /* solve U^T * D * y = b by forward substitution */
890 ierr = PetscArraycpy(x,b,5*mbs);CHKERRQ(ierr); /* x <- b */
891 ierr = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
892
893 /* solve U*x = y by back substitution */
894 ierr = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
895
896 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
897 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
898 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
899 PetscFunctionReturn(0);
900 }
901
MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)902 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
903 {
904 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
905 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
906 const MatScalar *aa=a->a;
907 PetscScalar *x;
908 const PetscScalar *b;
909 PetscErrorCode ierr;
910
911 PetscFunctionBegin;
912 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
913 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
914 ierr = PetscArraycpy(x,b,5*mbs);CHKERRQ(ierr); /* x <- b */
915 ierr = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
916 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
917 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
918 ierr = PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);CHKERRQ(ierr);
919 PetscFunctionReturn(0);
920 }
921
MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)922 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
923 {
924 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
925 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
926 const MatScalar *aa=a->a;
927 PetscScalar *x;
928 const PetscScalar *b;
929 PetscErrorCode ierr;
930
931 PetscFunctionBegin;
932 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
933 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
934 ierr = PetscArraycpy(x,b,5*mbs);CHKERRQ(ierr);
935 ierr = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
936 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
937 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
938 ierr = PetscLogFlops(2.0*a->bs2*(a->nz-mbs));CHKERRQ(ierr);
939 PetscFunctionReturn(0);
940 }
941
MatSolve_SeqSBAIJ_4_inplace(Mat A,Vec bb,Vec xx)942 PetscErrorCode MatSolve_SeqSBAIJ_4_inplace(Mat A,Vec bb,Vec xx)
943 {
944 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
945 IS isrow=a->row;
946 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j;
947 PetscErrorCode ierr;
948 const PetscInt *r,*vj;
949 PetscInt nz,k,idx;
950 const MatScalar *aa=a->a,*v,*diag;
951 PetscScalar *x,x0,x1,x2,x3,*t,*tp;
952 const PetscScalar *b;
953
954 PetscFunctionBegin;
955 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
956 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
957 t = a->solve_work;
958 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
959
960 /* solve U^T * D * y = b by forward substitution */
961 tp = t;
962 for (k=0; k<mbs; k++) { /* t <- perm(b) */
963 idx = 4*r[k];
964 tp[0] = b[idx];
965 tp[1] = b[idx+1];
966 tp[2] = b[idx+2];
967 tp[3] = b[idx+3];
968 tp += 4;
969 }
970
971 for (k=0; k<mbs; k++) {
972 v = aa + 16*ai[k];
973 vj = aj + ai[k];
974 tp = t + k*4;
975 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
976 nz = ai[k+1] - ai[k];
977
978 tp = t + (*vj)*4;
979 while (nz--) {
980 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
981 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
982 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
983 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
984 vj++;
985 tp = t + (*vj)*4;
986 v += 16;
987 }
988
989 /* xk = inv(Dk)*(Dk*xk) */
990 diag = aa+k*16; /* ptr to inv(Dk) */
991 tp = t + k*4;
992 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
993 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
994 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
995 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
996 }
997
998 /* solve U*x = y by back substitution */
999 for (k=mbs-1; k>=0; k--) {
1000 v = aa + 16*ai[k];
1001 vj = aj + ai[k];
1002 tp = t + k*4;
1003 x0 = tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
1004 nz = ai[k+1] - ai[k];
1005
1006 tp = t + (*vj)*4;
1007 while (nz--) {
1008 /* xk += U(k,:)*x(:) */
1009 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1010 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1011 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1012 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1013 vj++;
1014 tp = t + (*vj)*4;
1015 v += 16;
1016 }
1017 tp = t + k*4;
1018 tp[0] =x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1019 idx = 4*r[k];
1020 x[idx] = x0;
1021 x[idx+1] = x1;
1022 x[idx+2] = x2;
1023 x[idx+3] = x3;
1024 }
1025
1026 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1027 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1028 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1029 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
1030 PetscFunctionReturn(0);
1031 }
1032
MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)1033 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
1034 {
1035 const MatScalar *v,*diag;
1036 PetscScalar *xp,x0,x1,x2,x3;
1037 PetscInt nz,k;
1038 const PetscInt *vj;
1039
1040 PetscFunctionBegin;
1041 for (k=0; k<mbs; k++) {
1042 v = aa + 16*ai[k];
1043 xp = x + k*4;
1044 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1045 nz = ai[k+1] - ai[k];
1046 vj = aj + ai[k];
1047 xp = x + (*vj)*4;
1048 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1049 PetscPrefetchBlock(v+16*nz,16*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1050 while (nz--) {
1051 /* x(:) += U(k,:)^T*(Dk*xk) */
1052 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1053 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1054 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1055 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1056 vj++;
1057 xp = x + (*vj)*4;
1058 v += 16;
1059 }
1060 /* xk = inv(Dk)*(Dk*xk) */
1061 diag = aa+k*16; /* ptr to inv(Dk) */
1062 xp = x + k*4;
1063 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1064 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1065 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1066 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1067 }
1068 PetscFunctionReturn(0);
1069 }
1070
MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)1071 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
1072 {
1073 const MatScalar *v;
1074 PetscScalar *xp,x0,x1,x2,x3;
1075 PetscInt nz,k;
1076 const PetscInt *vj;
1077
1078 PetscFunctionBegin;
1079 for (k=mbs-1; k>=0; k--) {
1080 v = aa + 16*ai[k];
1081 xp = x + k*4;
1082 x0 = xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1083 nz = ai[k+1] - ai[k];
1084 vj = aj + ai[k];
1085 xp = x + (*vj)*4;
1086 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1087 PetscPrefetchBlock(v-16*nz,16*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1088 while (nz--) {
1089 /* xk += U(k,:)*x(:) */
1090 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1091 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1092 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1093 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1094 vj++;
1095 v += 16; xp = x + (*vj)*4;
1096 }
1097 xp = x + k*4;
1098 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1099 }
1100 PetscFunctionReturn(0);
1101 }
1102
MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1103 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1104 {
1105 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1106 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1107 const MatScalar *aa=a->a;
1108 PetscScalar *x;
1109 const PetscScalar *b;
1110 PetscErrorCode ierr;
1111
1112 PetscFunctionBegin;
1113 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1114 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1115
1116 /* solve U^T * D * y = b by forward substitution */
1117 ierr = PetscArraycpy(x,b,4*mbs);CHKERRQ(ierr); /* x <- b */
1118 ierr = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1119
1120 /* solve U*x = y by back substitution */
1121 ierr = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1122 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1123 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1124 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
1125 PetscFunctionReturn(0);
1126 }
1127
MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1128 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1129 {
1130 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1131 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1132 const MatScalar *aa=a->a;
1133 PetscScalar *x;
1134 const PetscScalar *b;
1135 PetscErrorCode ierr;
1136
1137 PetscFunctionBegin;
1138 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1140 ierr = PetscArraycpy(x,b,4*mbs);CHKERRQ(ierr); /* x <- b */
1141 ierr = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1142 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1143 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1144 ierr = PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);CHKERRQ(ierr);
1145 PetscFunctionReturn(0);
1146 }
1147
MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1148 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1149 {
1150 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1151 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1152 const MatScalar *aa=a->a;
1153 PetscScalar *x;
1154 const PetscScalar *b;
1155 PetscErrorCode ierr;
1156
1157 PetscFunctionBegin;
1158 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1159 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1160 ierr = PetscArraycpy(x,b,4*mbs);CHKERRQ(ierr);
1161 ierr = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1162 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1163 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1164 ierr = PetscLogFlops(2.0*a->bs2*(a->nz-mbs));CHKERRQ(ierr);
1165 PetscFunctionReturn(0);
1166 }
1167
MatSolve_SeqSBAIJ_3_inplace(Mat A,Vec bb,Vec xx)1168 PetscErrorCode MatSolve_SeqSBAIJ_3_inplace(Mat A,Vec bb,Vec xx)
1169 {
1170 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1171 IS isrow=a->row;
1172 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j;
1173 PetscErrorCode ierr;
1174 const PetscInt *r;
1175 PetscInt nz,k,idx;
1176 const PetscInt *vj;
1177 const MatScalar *aa=a->a,*v,*diag;
1178 PetscScalar *x,x0,x1,x2,*t,*tp;
1179 const PetscScalar *b;
1180
1181 PetscFunctionBegin;
1182 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1183 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1184 t = a->solve_work;
1185 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1186
1187 /* solve U^T * D * y = b by forward substitution */
1188 tp = t;
1189 for (k=0; k<mbs; k++) { /* t <- perm(b) */
1190 idx = 3*r[k];
1191 tp[0] = b[idx];
1192 tp[1] = b[idx+1];
1193 tp[2] = b[idx+2];
1194 tp += 3;
1195 }
1196
1197 for (k=0; k<mbs; k++) {
1198 v = aa + 9*ai[k];
1199 vj = aj + ai[k];
1200 tp = t + k*3;
1201 x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1202 nz = ai[k+1] - ai[k];
1203
1204 tp = t + (*vj)*3;
1205 while (nz--) {
1206 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1207 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1208 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1209 vj++;
1210 tp = t + (*vj)*3;
1211 v += 9;
1212 }
1213
1214 /* xk = inv(Dk)*(Dk*xk) */
1215 diag = aa+k*9; /* ptr to inv(Dk) */
1216 tp = t + k*3;
1217 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1218 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1219 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1220 }
1221
1222 /* solve U*x = y by back substitution */
1223 for (k=mbs-1; k>=0; k--) {
1224 v = aa + 9*ai[k];
1225 vj = aj + ai[k];
1226 tp = t + k*3;
1227 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */
1228 nz = ai[k+1] - ai[k];
1229
1230 tp = t + (*vj)*3;
1231 while (nz--) {
1232 /* xk += U(k,:)*x(:) */
1233 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1234 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1235 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1236 vj++;
1237 tp = t + (*vj)*3;
1238 v += 9;
1239 }
1240 tp = t + k*3;
1241 tp[0] = x0; tp[1] = x1; tp[2] = x2;
1242 idx = 3*r[k];
1243 x[idx] = x0;
1244 x[idx+1] = x1;
1245 x[idx+2] = x2;
1246 }
1247
1248 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1249 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1250 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1251 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
1252 PetscFunctionReturn(0);
1253 }
1254
MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)1255 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
1256 {
1257 const MatScalar *v,*diag;
1258 PetscScalar *xp,x0,x1,x2;
1259 PetscInt nz,k;
1260 const PetscInt *vj;
1261
1262 PetscFunctionBegin;
1263 for (k=0; k<mbs; k++) {
1264 v = aa + 9*ai[k];
1265 xp = x + k*3;
1266 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1267 nz = ai[k+1] - ai[k];
1268 vj = aj + ai[k];
1269 xp = x + (*vj)*3;
1270 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1271 PetscPrefetchBlock(v+9*nz,9*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1272 while (nz--) {
1273 /* x(:) += U(k,:)^T*(Dk*xk) */
1274 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1275 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1276 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1277 vj++;
1278 xp = x + (*vj)*3;
1279 v += 9;
1280 }
1281 /* xk = inv(Dk)*(Dk*xk) */
1282 diag = aa+k*9; /* ptr to inv(Dk) */
1283 xp = x + k*3;
1284 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1285 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1286 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1287 }
1288 PetscFunctionReturn(0);
1289 }
1290
MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)1291 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
1292 {
1293 const MatScalar *v;
1294 PetscScalar *xp,x0,x1,x2;
1295 PetscInt nz,k;
1296 const PetscInt *vj;
1297
1298 PetscFunctionBegin;
1299 for (k=mbs-1; k>=0; k--) {
1300 v = aa + 9*ai[k];
1301 xp = x + k*3;
1302 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */
1303 nz = ai[k+1] - ai[k];
1304 vj = aj + ai[k];
1305 xp = x + (*vj)*3;
1306 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1307 PetscPrefetchBlock(v-9*nz,9*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1308 while (nz--) {
1309 /* xk += U(k,:)*x(:) */
1310 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1311 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1312 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1313 vj++;
1314 v += 9; xp = x + (*vj)*3;
1315 }
1316 xp = x + k*3;
1317 xp[0] = x0; xp[1] = x1; xp[2] = x2;
1318 }
1319 PetscFunctionReturn(0);
1320 }
1321
MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1322 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1323 {
1324 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1325 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1326 const MatScalar *aa=a->a;
1327 PetscScalar *x;
1328 const PetscScalar *b;
1329 PetscErrorCode ierr;
1330
1331 PetscFunctionBegin;
1332 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1333 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1334
1335 /* solve U^T * D * y = b by forward substitution */
1336 ierr = PetscArraycpy(x,b,3*mbs);CHKERRQ(ierr);
1337 ierr = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1338
1339 /* solve U*x = y by back substitution */
1340 ierr = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1341
1342 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1343 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1344 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
1345 PetscFunctionReturn(0);
1346 }
1347
MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1348 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1349 {
1350 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1351 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1352 const MatScalar *aa=a->a;
1353 PetscScalar *x;
1354 const PetscScalar *b;
1355 PetscErrorCode ierr;
1356
1357 PetscFunctionBegin;
1358 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1360 ierr = PetscArraycpy(x,b,3*mbs);CHKERRQ(ierr);
1361 ierr = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1362 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1363 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1364 ierr = PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);CHKERRQ(ierr);
1365 PetscFunctionReturn(0);
1366 }
1367
MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1368 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1369 {
1370 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1371 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1372 const MatScalar *aa=a->a;
1373 PetscScalar *x;
1374 const PetscScalar *b;
1375 PetscErrorCode ierr;
1376
1377 PetscFunctionBegin;
1378 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1379 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1380 ierr = PetscArraycpy(x,b,3*mbs);CHKERRQ(ierr);
1381 ierr = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1382 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1383 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1384 ierr = PetscLogFlops(2.0*a->bs2*(a->nz-mbs));CHKERRQ(ierr);
1385 PetscFunctionReturn(0);
1386 }
1387
MatSolve_SeqSBAIJ_2_inplace(Mat A,Vec bb,Vec xx)1388 PetscErrorCode MatSolve_SeqSBAIJ_2_inplace(Mat A,Vec bb,Vec xx)
1389 {
1390 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1391 IS isrow=a->row;
1392 const PetscInt mbs =a->mbs,*ai=a->i,*aj=a->j;
1393 PetscErrorCode ierr;
1394 const PetscInt *r,*vj;
1395 PetscInt nz,k,k2,idx;
1396 const MatScalar *aa=a->a,*v,*diag;
1397 PetscScalar *x,x0,x1,*t;
1398 const PetscScalar *b;
1399
1400 PetscFunctionBegin;
1401 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1402 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1403 t = a->solve_work;
1404 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1405
1406 /* solve U^T * D * y = perm(b) by forward substitution */
1407 for (k=0; k<mbs; k++) { /* t <- perm(b) */
1408 idx = 2*r[k];
1409 t[k*2] = b[idx];
1410 t[k*2+1] = b[idx+1];
1411 }
1412 for (k=0; k<mbs; k++) {
1413 v = aa + 4*ai[k];
1414 vj = aj + ai[k];
1415 k2 = k*2;
1416 x0 = t[k2]; x1 = t[k2+1];
1417 nz = ai[k+1] - ai[k];
1418 while (nz--) {
1419 t[(*vj)*2] += v[0]*x0 + v[1]*x1;
1420 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1421 vj++; v += 4;
1422 }
1423 diag = aa+k*4; /* ptr to inv(Dk) */
1424 t[k2] = diag[0]*x0 + diag[2]*x1;
1425 t[k2+1] = diag[1]*x0 + diag[3]*x1;
1426 }
1427
1428 /* solve U*x = y by back substitution */
1429 for (k=mbs-1; k>=0; k--) {
1430 v = aa + 4*ai[k];
1431 vj = aj + ai[k];
1432 k2 = k*2;
1433 x0 = t[k2]; x1 = t[k2+1];
1434 nz = ai[k+1] - ai[k];
1435 while (nz--) {
1436 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1437 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1438 vj++;
1439 v += 4;
1440 }
1441 t[k2] = x0;
1442 t[k2+1] = x1;
1443 idx = 2*r[k];
1444 x[idx] = x0;
1445 x[idx+1] = x1;
1446 }
1447
1448 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1449 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1450 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1451 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
1452 PetscFunctionReturn(0);
1453 }
1454
MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)1455 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
1456 {
1457 const MatScalar *v,*diag;
1458 PetscScalar x0,x1;
1459 PetscInt nz,k,k2;
1460 const PetscInt *vj;
1461
1462 PetscFunctionBegin;
1463 for (k=0; k<mbs; k++) {
1464 v = aa + 4*ai[k];
1465 vj = aj + ai[k];
1466 k2 = k*2;
1467 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */
1468 nz = ai[k+1] - ai[k];
1469 PetscPrefetchBlock(vj+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1470 PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1471 while (nz--) {
1472 /* x(:) += U(k,:)^T*(Dk*xk) */
1473 x[(*vj)*2] += v[0]*x0 + v[1]*x1;
1474 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1475 vj++; v += 4;
1476 }
1477 /* xk = inv(Dk)*(Dk*xk) */
1478 diag = aa+k*4; /* ptr to inv(Dk) */
1479 x[k2] = diag[0]*x0 + diag[2]*x1;
1480 x[k2+1] = diag[1]*x0 + diag[3]*x1;
1481 }
1482 PetscFunctionReturn(0);
1483 }
1484
MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(const PetscInt * ai,const PetscInt * aj,const MatScalar * aa,PetscInt mbs,PetscScalar * x)1485 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(const PetscInt *ai,const PetscInt *aj,const MatScalar *aa,PetscInt mbs,PetscScalar *x)
1486 {
1487 const MatScalar *v;
1488 PetscScalar x0,x1;
1489 PetscInt nz,k,k2;
1490 const PetscInt *vj;
1491
1492 PetscFunctionBegin;
1493 for (k=mbs-1; k>=0; k--) {
1494 v = aa + 4*ai[k];
1495 vj = aj + ai[k];
1496 k2 = k*2;
1497 x0 = x[k2]; x1 = x[k2+1]; /* xk */
1498 nz = ai[k+1] - ai[k];
1499 PetscPrefetchBlock(vj-nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1500 PetscPrefetchBlock(v-4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1501 while (nz--) {
1502 /* xk += U(k,:)*x(:) */
1503 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1504 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1505 vj++;
1506 v += 4;
1507 }
1508 x[k2] = x0;
1509 x[k2+1] = x1;
1510 }
1511 PetscFunctionReturn(0);
1512 }
1513
MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1514 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1515 {
1516 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1517 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1518 const MatScalar *aa=a->a;
1519 PetscScalar *x;
1520 const PetscScalar *b;
1521 PetscErrorCode ierr;
1522
1523 PetscFunctionBegin;
1524 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1525 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1526
1527 /* solve U^T * D * y = b by forward substitution */
1528 ierr = PetscArraycpy(x,b,2*mbs);CHKERRQ(ierr);
1529 ierr = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1530
1531 /* solve U*x = y by back substitution */
1532 ierr = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1533
1534 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1535 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1536 ierr = PetscLogFlops(4.0*a->bs2*a->nz - (A->rmap->bs+2.0*a->bs2)*mbs);CHKERRQ(ierr);
1537 PetscFunctionReturn(0);
1538 }
1539
MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1540 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1541 {
1542 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1543 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1544 const MatScalar *aa=a->a;
1545 PetscScalar *x;
1546 const PetscScalar *b;
1547 PetscErrorCode ierr;
1548
1549 PetscFunctionBegin;
1550 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1551 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1552 ierr = PetscArraycpy(x,b,2*mbs);CHKERRQ(ierr);
1553 ierr = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1554 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1555 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1556 ierr = PetscLogFlops(2.0*a->bs2*a->nz - A->rmap->bs*mbs);CHKERRQ(ierr);
1557 PetscFunctionReturn(0);
1558 }
1559
MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1560 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1561 {
1562 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data;
1563 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1564 const MatScalar *aa=a->a;
1565 PetscScalar *x;
1566 const PetscScalar *b;
1567 PetscErrorCode ierr;
1568
1569 PetscFunctionBegin;
1570 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1571 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1572 ierr = PetscArraycpy(x,b,2*mbs);CHKERRQ(ierr);
1573 ierr = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1574 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1575 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1576 ierr = PetscLogFlops(2.0*a->bs2*(a->nz - mbs));CHKERRQ(ierr);
1577 PetscFunctionReturn(0);
1578 }
1579
MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)1580 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1581 {
1582 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1583 IS isrow=a->row;
1584 PetscErrorCode ierr;
1585 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag=a->diag;
1586 const MatScalar *aa=a->a,*v;
1587 const PetscScalar *b;
1588 PetscScalar *x,xk,*t;
1589 PetscInt nz,k,j;
1590
1591 PetscFunctionBegin;
1592 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1593 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1594 t = a->solve_work;
1595 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1596
1597 /* solve U^T*D*y = perm(b) by forward substitution */
1598 for (k=0; k<mbs; k++) t[k] = b[rp[k]];
1599 for (k=0; k<mbs; k++) {
1600 v = aa + ai[k];
1601 vj = aj + ai[k];
1602 xk = t[k];
1603 nz = ai[k+1] - ai[k] - 1;
1604 for (j=0; j<nz; j++) t[vj[j]] += v[j]*xk;
1605 t[k] = xk*v[nz]; /* v[nz] = 1/D(k) */
1606 }
1607
1608 /* solve U*perm(x) = y by back substitution */
1609 for (k=mbs-1; k>=0; k--) {
1610 v = aa + adiag[k] - 1;
1611 vj = aj + adiag[k] - 1;
1612 nz = ai[k+1] - ai[k] - 1;
1613 for (j=0; j<nz; j++) t[k] += v[-j]*t[vj[-j]];
1614 x[rp[k]] = t[k];
1615 }
1616
1617 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1618 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1619 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1620 ierr = PetscLogFlops(4.0*a->nz - 3.0*mbs);CHKERRQ(ierr);
1621 PetscFunctionReturn(0);
1622 }
1623
MatSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx)1624 PetscErrorCode MatSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx)
1625 {
1626 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1627 IS isrow=a->row;
1628 PetscErrorCode ierr;
1629 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj;
1630 const MatScalar *aa=a->a,*v;
1631 PetscScalar *x,xk,*t;
1632 const PetscScalar *b;
1633 PetscInt nz,k;
1634
1635 PetscFunctionBegin;
1636 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1637 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1638 t = a->solve_work;
1639 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1640
1641 /* solve U^T*D*y = perm(b) by forward substitution */
1642 for (k=0; k<mbs; k++) t[k] = b[rp[k]];
1643 for (k=0; k<mbs; k++) {
1644 v = aa + ai[k] + 1;
1645 vj = aj + ai[k] + 1;
1646 xk = t[k];
1647 nz = ai[k+1] - ai[k] - 1;
1648 while (nz--) t[*vj++] += (*v++) * xk;
1649 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */
1650 }
1651
1652 /* solve U*perm(x) = y by back substitution */
1653 for (k=mbs-1; k>=0; k--) {
1654 v = aa + ai[k] + 1;
1655 vj = aj + ai[k] + 1;
1656 nz = ai[k+1] - ai[k] - 1;
1657 while (nz--) t[k] += (*v++) * t[*vj++];
1658 x[rp[k]] = t[k];
1659 }
1660
1661 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1662 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1663 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1664 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr);
1665 PetscFunctionReturn(0);
1666 }
1667
MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)1668 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1669 {
1670 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1671 IS isrow=a->row;
1672 PetscErrorCode ierr;
1673 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag=a->diag;
1674 const MatScalar *aa=a->a,*v;
1675 PetscReal diagk;
1676 PetscScalar *x,xk;
1677 const PetscScalar *b;
1678 PetscInt nz,k;
1679
1680 PetscFunctionBegin;
1681 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */
1682 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1683 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1684 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1685
1686 for (k=0; k<mbs; k++) x[k] = b[rp[k]];
1687 for (k=0; k<mbs; k++) {
1688 v = aa + ai[k];
1689 vj = aj + ai[k];
1690 xk = x[k];
1691 nz = ai[k+1] - ai[k] - 1;
1692 while (nz--) x[*vj++] += (*v++) * xk;
1693
1694 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[diag[k]] = 1/D(k) */
1695 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1696 x[k] = xk*PetscSqrtReal(diagk);
1697 }
1698 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1699 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1700 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1701 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
1702 PetscFunctionReturn(0);
1703 }
1704
MatForwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx)1705 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx)
1706 {
1707 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1708 IS isrow=a->row;
1709 PetscErrorCode ierr;
1710 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj;
1711 const MatScalar *aa=a->a,*v;
1712 PetscReal diagk;
1713 PetscScalar *x,xk;
1714 const PetscScalar *b;
1715 PetscInt nz,k;
1716
1717 PetscFunctionBegin;
1718 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */
1719 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1720 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1721 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1722
1723 for (k=0; k<mbs; k++) x[k] = b[rp[k]];
1724 for (k=0; k<mbs; k++) {
1725 v = aa + ai[k] + 1;
1726 vj = aj + ai[k] + 1;
1727 xk = x[k];
1728 nz = ai[k+1] - ai[k] - 1;
1729 while (nz--) x[*vj++] += (*v++) * xk;
1730
1731 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1732 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1733 x[k] = xk*PetscSqrtReal(diagk);
1734 }
1735 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1736 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1737 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1738 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
1739 PetscFunctionReturn(0);
1740 }
1741
MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)1742 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1743 {
1744 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1745 IS isrow=a->row;
1746 PetscErrorCode ierr;
1747 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag=a->diag;
1748 const MatScalar *aa=a->a,*v;
1749 PetscReal diagk;
1750 PetscScalar *x,*t;
1751 const PetscScalar *b;
1752 PetscInt nz,k;
1753
1754 PetscFunctionBegin;
1755 /* solve D^(1/2)*U*perm(x) = b by back substitution */
1756 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1757 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1758 t = a->solve_work;
1759 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1760
1761 for (k=mbs-1; k>=0; k--) {
1762 v = aa + ai[k];
1763 vj = aj + ai[k];
1764 diagk = PetscRealPart(aa[adiag[k]]);
1765 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1766 t[k] = b[k] * PetscSqrtReal(diagk);
1767 nz = ai[k+1] - ai[k] - 1;
1768 while (nz--) t[k] += (*v++) * t[*vj++];
1769 x[rp[k]] = t[k];
1770 }
1771 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1772 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1773 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1774 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
1775 PetscFunctionReturn(0);
1776 }
1777
MatBackwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx)1778 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx)
1779 {
1780 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1781 IS isrow=a->row;
1782 PetscErrorCode ierr;
1783 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj;
1784 const MatScalar *aa=a->a,*v;
1785 PetscReal diagk;
1786 PetscScalar *x,*t;
1787 const PetscScalar *b;
1788 PetscInt nz,k;
1789
1790 PetscFunctionBegin;
1791 /* solve D^(1/2)*U*perm(x) = b by back substitution */
1792 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1793 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1794 t = a->solve_work;
1795 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1796
1797 for (k=mbs-1; k>=0; k--) {
1798 v = aa + ai[k] + 1;
1799 vj = aj + ai[k] + 1;
1800 diagk = PetscRealPart(aa[ai[k]]);
1801 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1802 t[k] = b[k] * PetscSqrtReal(diagk);
1803 nz = ai[k+1] - ai[k] - 1;
1804 while (nz--) t[k] += (*v++) * t[*vj++];
1805 x[rp[k]] = t[k];
1806 }
1807 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1808 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1809 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1810 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
1811 PetscFunctionReturn(0);
1812 }
1813
MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)1814 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1815 {
1816 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1817 PetscErrorCode ierr;
1818
1819 PetscFunctionBegin;
1820 if (A->rmap->bs == 1) {
1821 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1822 } else {
1823 IS isrow=a->row;
1824 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1825 const MatScalar *aa=a->a,*v;
1826 PetscScalar *x,*t;
1827 const PetscScalar *b;
1828 PetscInt nz,k,n,i,j;
1829
1830 if (bb->n > a->solves_work_n) {
1831 ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
1832 ierr = PetscMalloc1(bb->n*A->rmap->N,&a->solves_work);CHKERRQ(ierr);
1833 a->solves_work_n = bb->n;
1834 }
1835 n = bb->n;
1836 ierr = VecGetArrayRead(bb->v,&b);CHKERRQ(ierr);
1837 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1838 t = a->solves_work;
1839
1840 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1841
1842 /* solve U^T*D*y = perm(b) by forward substitution */
1843 for (k=0; k<mbs; k++) {
1844 for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs]; /* values are stored interlaced in t */
1845 }
1846 for (k=0; k<mbs; k++) {
1847 v = aa + ai[k];
1848 vj = aj + ai[k];
1849 nz = ai[k+1] - ai[k] - 1;
1850 for (j=0; j<nz; j++) {
1851 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1852 v++;vj++;
1853 }
1854 for (i=0; i<n; i++) t[n*k+i] *= aa[nz]; /* note: aa[nz] = 1/D(k) */
1855 }
1856
1857 /* solve U*perm(x) = y by back substitution */
1858 for (k=mbs-1; k>=0; k--) {
1859 v = aa + ai[k] - 1;
1860 vj = aj + ai[k] - 1;
1861 nz = ai[k+1] - ai[k] - 1;
1862 for (j=0; j<nz; j++) {
1863 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1864 v++;vj++;
1865 }
1866 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i];
1867 }
1868
1869 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1870 ierr = VecRestoreArrayRead(bb->v,&b);CHKERRQ(ierr);
1871 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1872 ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr);
1873 }
1874 PetscFunctionReturn(0);
1875 }
1876
MatSolves_SeqSBAIJ_1_inplace(Mat A,Vecs bb,Vecs xx)1877 PetscErrorCode MatSolves_SeqSBAIJ_1_inplace(Mat A,Vecs bb,Vecs xx)
1878 {
1879 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1880 PetscErrorCode ierr;
1881
1882 PetscFunctionBegin;
1883 if (A->rmap->bs == 1) {
1884 ierr = MatSolve_SeqSBAIJ_1_inplace(A,bb->v,xx->v);CHKERRQ(ierr);
1885 } else {
1886 IS isrow=a->row;
1887 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1888 const MatScalar *aa=a->a,*v;
1889 PetscScalar *x,*t;
1890 const PetscScalar *b;
1891 PetscInt nz,k,n,i;
1892
1893 if (bb->n > a->solves_work_n) {
1894 ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
1895 ierr = PetscMalloc1(bb->n*A->rmap->N,&a->solves_work);CHKERRQ(ierr);
1896 a->solves_work_n = bb->n;
1897 }
1898 n = bb->n;
1899 ierr = VecGetArrayRead(bb->v,&b);CHKERRQ(ierr);
1900 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1901 t = a->solves_work;
1902
1903 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr);
1904
1905 /* solve U^T*D*y = perm(b) by forward substitution */
1906 for (k=0; k<mbs; k++) {
1907 for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs]; /* values are stored interlaced in t */
1908 }
1909 for (k=0; k<mbs; k++) {
1910 v = aa + ai[k];
1911 vj = aj + ai[k];
1912 nz = ai[k+1] - ai[k];
1913 while (nz--) {
1914 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1915 v++;vj++;
1916 }
1917 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */
1918 }
1919
1920 /* solve U*perm(x) = y by back substitution */
1921 for (k=mbs-1; k>=0; k--) {
1922 v = aa + ai[k];
1923 vj = aj + ai[k];
1924 nz = ai[k+1] - ai[k];
1925 while (nz--) {
1926 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1927 v++;vj++;
1928 }
1929 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i];
1930 }
1931
1932 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr);
1933 ierr = VecRestoreArrayRead(bb->v,&b);CHKERRQ(ierr);
1934 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1935 ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr);
1936 }
1937 PetscFunctionReturn(0);
1938 }
1939
MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)1940 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1941 {
1942 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1943 PetscErrorCode ierr;
1944 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj,*adiag=a->diag;
1945 const MatScalar *aa=a->a,*v;
1946 const PetscScalar *b;
1947 PetscScalar *x,xi;
1948 PetscInt nz,i,j;
1949
1950 PetscFunctionBegin;
1951 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1952 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1953 /* solve U^T*D*y = b by forward substitution */
1954 ierr = PetscArraycpy(x,b,mbs);CHKERRQ(ierr);
1955 for (i=0; i<mbs; i++) {
1956 v = aa + ai[i];
1957 vj = aj + ai[i];
1958 xi = x[i];
1959 nz = ai[i+1] - ai[i] - 1; /* exclude diag[i] */
1960 for (j=0; j<nz; j++) x[vj[j]] += v[j]*xi;
1961 x[i] = xi*v[nz]; /* v[nz] = aa[diag[i]] = 1/D(i) */
1962 }
1963 /* solve U*x = y by backward substitution */
1964 for (i=mbs-2; i>=0; i--) {
1965 xi = x[i];
1966 v = aa + adiag[i] - 1; /* end of row i, excluding diag */
1967 vj = aj + adiag[i] - 1;
1968 nz = ai[i+1] - ai[i] - 1;
1969 for (j=0; j<nz; j++) xi += v[-j]*x[vj[-j]];
1970 x[i] = xi;
1971 }
1972 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1973 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1974 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr);
1975 PetscFunctionReturn(0);
1976 }
1977
MatMatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat B,Mat X)1978 PetscErrorCode MatMatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat B,Mat X)
1979 {
1980 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1981 PetscErrorCode ierr;
1982 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj,*adiag=a->diag;
1983 const MatScalar *aa=a->a,*v;
1984 const PetscScalar *b;
1985 PetscScalar *x,xi;
1986 PetscInt nz,i,j,neq,ldb,ldx;
1987 PetscBool isdense;
1988
1989 PetscFunctionBegin;
1990 if (!mbs) PetscFunctionReturn(0);
1991 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1992 if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1993 if (X != B) {
1994 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1995 if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1996 }
1997 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
1998 ierr = MatDenseGetLDA(B,&ldb);CHKERRQ(ierr);
1999 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
2000 ierr = MatDenseGetLDA(X,&ldx);CHKERRQ(ierr);
2001 for (neq=0; neq<B->cmap->n; neq++) {
2002 /* solve U^T*D*y = b by forward substitution */
2003 ierr = PetscArraycpy(x,b,mbs);CHKERRQ(ierr);
2004 for (i=0; i<mbs; i++) {
2005 v = aa + ai[i];
2006 vj = aj + ai[i];
2007 xi = x[i];
2008 nz = ai[i+1] - ai[i] - 1; /* exclude diag[i] */
2009 for (j=0; j<nz; j++) x[vj[j]] += v[j]*xi;
2010 x[i] = xi*v[nz]; /* v[nz] = aa[diag[i]] = 1/D(i) */
2011 }
2012 /* solve U*x = y by backward substitution */
2013 for (i=mbs-2; i>=0; i--) {
2014 xi = x[i];
2015 v = aa + adiag[i] - 1; /* end of row i, excluding diag */
2016 vj = aj + adiag[i] - 1;
2017 nz = ai[i+1] - ai[i] - 1;
2018 for (j=0; j<nz; j++) xi += v[-j]*x[vj[-j]];
2019 x[i] = xi;
2020 }
2021 b += ldb;
2022 x += ldx;
2023 }
2024 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
2025 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
2026 ierr = PetscLogFlops(B->cmap->n*(4.0*a->nz - 3*mbs));CHKERRQ(ierr);
2027 PetscFunctionReturn(0);
2028 }
2029
MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)2030 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
2031 {
2032 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2033 PetscErrorCode ierr;
2034 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj;
2035 const MatScalar *aa=a->a,*v;
2036 PetscScalar *x,xk;
2037 const PetscScalar *b;
2038 PetscInt nz,k;
2039
2040 PetscFunctionBegin;
2041 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2042 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2043
2044 /* solve U^T*D*y = b by forward substitution */
2045 ierr = PetscArraycpy(x,b,mbs);CHKERRQ(ierr);
2046 for (k=0; k<mbs; k++) {
2047 v = aa + ai[k] + 1;
2048 vj = aj + ai[k] + 1;
2049 xk = x[k];
2050 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */
2051 while (nz--) x[*vj++] += (*v++) * xk;
2052 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */
2053 }
2054
2055 /* solve U*x = y by back substitution */
2056 for (k=mbs-2; k>=0; k--) {
2057 v = aa + ai[k] + 1;
2058 vj = aj + ai[k] + 1;
2059 xk = x[k];
2060 nz = ai[k+1] - ai[k] - 1;
2061 while (nz--) {
2062 xk += (*v++) * x[*vj++];
2063 }
2064 x[k] = xk;
2065 }
2066
2067 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2068 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2069 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr);
2070 PetscFunctionReturn(0);
2071 }
2072
MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)2073 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
2074 {
2075 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2076 PetscErrorCode ierr;
2077 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag=a->diag,*vj;
2078 const MatScalar *aa=a->a,*v;
2079 PetscReal diagk;
2080 PetscScalar *x;
2081 const PetscScalar *b;
2082 PetscInt nz,k;
2083
2084 PetscFunctionBegin;
2085 /* solve U^T*D^(1/2)*x = b by forward substitution */
2086 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2087 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2088 ierr = PetscArraycpy(x,b,mbs);CHKERRQ(ierr);
2089 for (k=0; k<mbs; k++) {
2090 v = aa + ai[k];
2091 vj = aj + ai[k];
2092 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */
2093 while (nz--) x[*vj++] += (*v++) * x[k];
2094 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[adiag[k]] = 1/D(k) */
2095 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[adiag[k]]),PetscImaginaryPart(aa[adiag[k]]));
2096 x[k] *= PetscSqrtReal(diagk);
2097 }
2098 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2099 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2100 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
2101 PetscFunctionReturn(0);
2102 }
2103
MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)2104 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
2105 {
2106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2107 PetscErrorCode ierr;
2108 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj;
2109 const MatScalar *aa=a->a,*v;
2110 PetscReal diagk;
2111 PetscScalar *x;
2112 const PetscScalar *b;
2113 PetscInt nz,k;
2114
2115 PetscFunctionBegin;
2116 /* solve U^T*D^(1/2)*x = b by forward substitution */
2117 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2118 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2119 ierr = PetscArraycpy(x,b,mbs);CHKERRQ(ierr);
2120 for (k=0; k<mbs; k++) {
2121 v = aa + ai[k] + 1;
2122 vj = aj + ai[k] + 1;
2123 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */
2124 while (nz--) x[*vj++] += (*v++) * x[k];
2125 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
2126 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]]));
2127 x[k] *= PetscSqrtReal(diagk);
2128 }
2129 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2130 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2131 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
2132 PetscFunctionReturn(0);
2133 }
2134
MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)2135 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
2136 {
2137 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2138 PetscErrorCode ierr;
2139 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag=a->diag,*vj;
2140 const MatScalar *aa=a->a,*v;
2141 PetscReal diagk;
2142 PetscScalar *x;
2143 const PetscScalar *b;
2144 PetscInt nz,k;
2145
2146 PetscFunctionBegin;
2147 /* solve D^(1/2)*U*x = b by back substitution */
2148 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2149 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2150
2151 for (k=mbs-1; k>=0; k--) {
2152 v = aa + ai[k];
2153 vj = aj + ai[k];
2154 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[diag[k]] = 1/D(k) */
2155 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
2156 x[k] = PetscSqrtReal(diagk)*b[k];
2157 nz = ai[k+1] - ai[k] - 1;
2158 while (nz--) x[k] += (*v++) * x[*vj++];
2159 }
2160 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2161 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2162 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
2163 PetscFunctionReturn(0);
2164 }
2165
MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)2166 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
2167 {
2168 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2169 PetscErrorCode ierr;
2170 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj;
2171 const MatScalar *aa=a->a,*v;
2172 PetscReal diagk;
2173 PetscScalar *x;
2174 const PetscScalar *b;
2175 PetscInt nz,k;
2176
2177 PetscFunctionBegin;
2178 /* solve D^(1/2)*U*x = b by back substitution */
2179 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2181
2182 for (k=mbs-1; k>=0; k--) {
2183 v = aa + ai[k] + 1;
2184 vj = aj + ai[k] + 1;
2185 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
2186 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
2187 x[k] = PetscSqrtReal(diagk)*b[k];
2188 nz = ai[k+1] - ai[k] - 1;
2189 while (nz--) x[k] += (*v++) * x[*vj++];
2190 }
2191 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2192 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2193 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr);
2194 PetscFunctionReturn(0);
2195 }
2196
2197 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo * info)2198 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info)
2199 {
2200 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
2201 PetscErrorCode ierr;
2202 const PetscInt *rip,mbs = a->mbs,*ai,*aj;
2203 PetscInt *jutmp,bs = A->rmap->bs,i;
2204 PetscInt m,reallocs = 0,*levtmp;
2205 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
2206 PetscInt incrlev,*lev,shift,prow,nz;
2207 PetscReal f = info->fill,levels = info->levels;
2208 PetscBool perm_identity;
2209
2210 PetscFunctionBegin;
2211 /* check whether perm is the identity mapping */
2212 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2213
2214 if (perm_identity) {
2215 a->permute = PETSC_FALSE;
2216 ai = a->i; aj = a->j;
2217 } else { /* non-trivial permutation */
2218 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
2219 a->permute = PETSC_TRUE;
2220 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
2221 ai = a->inew; aj = a->jnew;
2222 }
2223
2224 /* initialization */
2225 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2226 umax = (PetscInt)(f*ai[mbs] + 1);
2227 ierr = PetscMalloc1(umax,&lev);CHKERRQ(ierr);
2228 umax += mbs + 1;
2229 shift = mbs + 1;
2230 ierr = PetscMalloc1(mbs+1,&iu);CHKERRQ(ierr);
2231 ierr = PetscMalloc1(umax,&ju);CHKERRQ(ierr);
2232 iu[0] = mbs + 1;
2233 juidx = mbs + 1;
2234 /* prowl: linked list for pivot row */
2235 ierr = PetscMalloc3(mbs,&prowl,mbs,&q,mbs,&levtmp);CHKERRQ(ierr);
2236 /* q: linked list for col index */
2237
2238 for (i=0; i<mbs; i++) {
2239 prowl[i] = mbs;
2240 q[i] = 0;
2241 }
2242
2243 /* for each row k */
2244 for (k=0; k<mbs; k++) {
2245 nzk = 0;
2246 q[k] = mbs;
2247 /* copy current row into linked list */
2248 nz = ai[rip[k]+1] - ai[rip[k]];
2249 j = ai[rip[k]];
2250 while (nz--) {
2251 vj = rip[aj[j++]];
2252 if (vj > k) {
2253 qm = k;
2254 do {
2255 m = qm; qm = q[m];
2256 } while (qm < vj);
2257 if (qm == vj) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A\n");
2258 nzk++;
2259 q[m] = vj;
2260 q[vj] = qm;
2261 levtmp[vj] = 0; /* initialize lev for nonzero element */
2262 }
2263 }
2264
2265 /* modify nonzero structure of k-th row by computing fill-in
2266 for each row prow to be merged in */
2267 prow = k;
2268 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
2269
2270 while (prow < k) {
2271 /* merge row prow into k-th row */
2272 jmin = iu[prow] + 1;
2273 jmax = iu[prow+1];
2274 qm = k;
2275 for (j=jmin; j<jmax; j++) {
2276 incrlev = lev[j-shift] + 1;
2277 if (incrlev > levels) continue;
2278 vj = ju[j];
2279 do {
2280 m = qm; qm = q[m];
2281 } while (qm < vj);
2282 if (qm != vj) { /* a fill */
2283 nzk++; q[m] = vj; q[vj] = qm; qm = vj;
2284 levtmp[vj] = incrlev;
2285 } else if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
2286 }
2287 prow = prowl[prow]; /* next pivot row */
2288 }
2289
2290 /* add k to row list for first nonzero element in k-th row */
2291 if (nzk > 1) {
2292 i = q[k]; /* col value of first nonzero element in k_th row of U */
2293 prowl[k] = prowl[i]; prowl[i] = k;
2294 }
2295 iu[k+1] = iu[k] + nzk;
2296
2297 /* allocate more space to ju and lev if needed */
2298 if (iu[k+1] > umax) {
2299 /* estimate how much additional space we will need */
2300 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
2301 /* just double the memory each time */
2302 maxadd = umax;
2303 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
2304 umax += maxadd;
2305
2306 /* allocate a longer ju */
2307 ierr = PetscMalloc1(umax,&jutmp);CHKERRQ(ierr);
2308 ierr = PetscArraycpy(jutmp,ju,iu[k]);CHKERRQ(ierr);
2309 ierr = PetscFree(ju);CHKERRQ(ierr);
2310 ju = jutmp;
2311
2312 ierr = PetscMalloc1(umax,&jutmp);CHKERRQ(ierr);
2313 ierr = PetscArraycpy(jutmp,lev,iu[k]-shift);CHKERRQ(ierr);
2314 ierr = PetscFree(lev);CHKERRQ(ierr);
2315 lev = jutmp;
2316 reallocs += 2; /* count how many times we realloc */
2317 }
2318
2319 /* save nonzero structure of k-th row in ju */
2320 i=k;
2321 while (nzk--) {
2322 i = q[i];
2323 ju[juidx] = i;
2324 lev[juidx-shift] = levtmp[i];
2325 juidx++;
2326 }
2327 }
2328
2329 #if defined(PETSC_USE_INFO)
2330 if (ai[mbs] != 0) {
2331 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2332 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
2333 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2334 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
2335 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2336 } else {
2337 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2338 }
2339 #endif
2340
2341 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2342 ierr = PetscFree3(prowl,q,levtmp);CHKERRQ(ierr);
2343 ierr = PetscFree(lev);CHKERRQ(ierr);
2344
2345 /* put together the new matrix */
2346 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,NULL);CHKERRQ(ierr);
2347
2348 /* ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)iperm);CHKERRQ(ierr); */
2349 b = (Mat_SeqSBAIJ*)(B)->data;
2350 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2351
2352 b->singlemalloc = PETSC_FALSE;
2353 b->free_a = PETSC_TRUE;
2354 b->free_ij = PETSC_TRUE;
2355 /* the next line frees the default space generated by the Create() */
2356 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
2357 ierr = PetscMalloc1((iu[mbs]+1)*a->bs2,&b->a);CHKERRQ(ierr);
2358 b->j = ju;
2359 b->i = iu;
2360 b->diag = NULL;
2361 b->ilen = NULL;
2362 b->imax = NULL;
2363
2364 ierr = ISDestroy(&b->row);CHKERRQ(ierr);
2365 ierr = ISDestroy(&b->icol);CHKERRQ(ierr);
2366 b->row = perm;
2367 b->icol = perm;
2368 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2369 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2370 ierr = PetscMalloc1(bs*mbs+bs,&b->solve_work);CHKERRQ(ierr);
2371 /* In b structure: Free imax, ilen, old a, old j.
2372 Allocate idnew, solve_work, new a, new j */
2373 ierr = PetscLogObjectMemory((PetscObject)B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2374 b->maxnz = b->nz = iu[mbs];
2375
2376 (B)->info.factor_mallocs = reallocs;
2377 (B)->info.fill_ratio_given = f;
2378 if (ai[mbs] != 0) {
2379 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2380 } else {
2381 (B)->info.fill_ratio_needed = 0.0;
2382 }
2383 ierr = MatSeqSBAIJSetNumericFactorization_inplace(B,perm_identity);CHKERRQ(ierr);
2384 PetscFunctionReturn(0);
2385 }
2386
2387 /*
2388 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure
2389 */
2390 #include <petscbt.h>
2391 #include <../src/mat/utils/freespace.h>
MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2392 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2393 {
2394 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
2395 PetscErrorCode ierr;
2396 PetscBool perm_identity,free_ij = PETSC_TRUE,missing;
2397 PetscInt bs=A->rmap->bs,am=a->mbs,d,*ai=a->i,*aj= a->j;
2398 const PetscInt *rip;
2399 PetscInt reallocs=0,i,*ui,*udiag,*cols;
2400 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2401 PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,*uj,**uj_ptr,**uj_lvl_ptr;
2402 PetscReal fill =info->fill,levels=info->levels;
2403 PetscFreeSpaceList free_space =NULL,current_space=NULL;
2404 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2405 PetscBT lnkbt;
2406
2407 PetscFunctionBegin;
2408 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2409 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2410 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2411 if (bs > 1) {
2412 ierr = MatICCFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);CHKERRQ(ierr);
2413 PetscFunctionReturn(0);
2414 }
2415
2416 /* check whether perm is the identity mapping */
2417 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2418 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
2419 a->permute = PETSC_FALSE;
2420
2421 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2422 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2423 ui[0] = 0;
2424
2425 /* ICC(0) without matrix ordering: simply rearrange column indices */
2426 if (!levels) {
2427 /* reuse the column pointers and row offsets for memory savings */
2428 for (i=0; i<am; i++) {
2429 ncols = ai[i+1] - ai[i];
2430 ui[i+1] = ui[i] + ncols;
2431 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2432 }
2433 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2434 cols = uj;
2435 for (i=0; i<am; i++) {
2436 aj = a->j + ai[i] + 1; /* 1st entry of U(i,:) without diagonal */
2437 ncols = ai[i+1] - ai[i] -1;
2438 for (j=0; j<ncols; j++) *cols++ = aj[j];
2439 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2440 }
2441 } else { /* case: levels>0 */
2442 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2443
2444 /* initialization */
2445 /* jl: linked list for storing indices of the pivot rows
2446 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2447 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr);
2448 for (i=0; i<am; i++) {
2449 jl[i] = am; il[i] = 0;
2450 }
2451
2452 /* create and initialize a linked list for storing column indices of the active row k */
2453 nlnk = am + 1;
2454 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2455
2456 /* initial FreeSpace size is fill*(ai[am]+1) */
2457 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2458
2459 current_space = free_space;
2460
2461 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr);
2462
2463 current_space_lvl = free_space_lvl;
2464
2465 for (k=0; k<am; k++) { /* for each active row k */
2466 /* initialize lnk by the column indices of row k */
2467 nzk = 0;
2468 ncols = ai[k+1] - ai[k];
2469 if (!ncols) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k);
2470 cols = aj+ai[k];
2471 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2472 nzk += nlnk;
2473
2474 /* update lnk by computing fill-in for each pivot row to be merged in */
2475 prow = jl[k]; /* 1st pivot row */
2476
2477 while (prow < k) {
2478 nextprow = jl[prow];
2479
2480 /* merge prow into k-th row */
2481 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2482 jmax = ui[prow+1];
2483 ncols = jmax-jmin;
2484 i = jmin - ui[prow];
2485
2486 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2487 uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2488 j = *(uj - 1);
2489 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2490 nzk += nlnk;
2491
2492 /* update il and jl for prow */
2493 if (jmin < jmax) {
2494 il[prow] = jmin;
2495
2496 j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2497 }
2498 prow = nextprow;
2499 }
2500
2501 /* if free space is not available, make more free space */
2502 if (current_space->local_remaining<nzk) {
2503 i = am - k + 1; /* num of unfactored rows */
2504 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2505 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr);
2506 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr);
2507 reallocs++;
2508 }
2509
2510 /* copy data into free_space and free_space_lvl, then initialize lnk */
2511 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2512 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2513
2514 /* add the k-th row into il and jl */
2515 if (nzk > 1) {
2516 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2517 jl[k] = jl[i]; jl[i] = k;
2518 il[k] = ui[k] + 1;
2519 }
2520 uj_ptr[k] = current_space->array;
2521 uj_lvl_ptr[k] = current_space_lvl->array;
2522
2523 current_space->array += nzk;
2524 current_space->local_used += nzk;
2525 current_space->local_remaining -= nzk;
2526 current_space_lvl->array += nzk;
2527 current_space_lvl->local_used += nzk;
2528 current_space_lvl->local_remaining -= nzk;
2529
2530 ui[k+1] = ui[k] + nzk;
2531 }
2532
2533 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2534 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr);
2535
2536 /* destroy list of free space and other temporary array(s) */
2537 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2538 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2539 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2540 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2541
2542 } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2543
2544 /* put together the new matrix in MATSEQSBAIJ format */
2545 ierr = MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2546
2547 b = (Mat_SeqSBAIJ*)(fact)->data;
2548 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2549
2550 b->singlemalloc = PETSC_FALSE;
2551 b->free_a = PETSC_TRUE;
2552 b->free_ij = free_ij;
2553
2554 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2555
2556 b->j = uj;
2557 b->i = ui;
2558 b->diag = udiag;
2559 b->free_diag = PETSC_TRUE;
2560 b->ilen = NULL;
2561 b->imax = NULL;
2562 b->row = perm;
2563 b->col = perm;
2564
2565 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2566 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2567
2568 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2569
2570 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2571 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2572
2573 b->maxnz = b->nz = ui[am];
2574
2575 fact->info.factor_mallocs = reallocs;
2576 fact->info.fill_ratio_given = fill;
2577 if (ai[am] != 0) {
2578 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/ai[am];
2579 } else {
2580 fact->info.fill_ratio_needed = 0.0;
2581 }
2582 #if defined(PETSC_USE_INFO)
2583 if (ai[am] != 0) {
2584 PetscReal af = fact->info.fill_ratio_needed;
2585 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2586 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2587 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2588 } else {
2589 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2590 }
2591 #endif
2592 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2593 PetscFunctionReturn(0);
2594 }
2595
MatICCFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2596 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2597 {
2598 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2599 Mat_SeqSBAIJ *b;
2600 PetscErrorCode ierr;
2601 PetscBool perm_identity,free_ij = PETSC_TRUE;
2602 PetscInt bs=A->rmap->bs,am=a->mbs;
2603 const PetscInt *cols,*rip,*ai=a->i,*aj=a->j;
2604 PetscInt reallocs=0,i,*ui;
2605 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2606 PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2607 PetscReal fill =info->fill,levels=info->levels,ratio_needed;
2608 PetscFreeSpaceList free_space =NULL,current_space=NULL;
2609 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2610 PetscBT lnkbt;
2611
2612 PetscFunctionBegin;
2613 /*
2614 This code originally uses Modified Sparse Row (MSR) storage
2615 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2616 Then it is rewritten so the factor B takes seqsbaij format. However the associated
2617 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
2618 thus the original code in MSR format is still used for these cases.
2619 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
2620 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2621 */
2622 if (bs > 1) {
2623 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr);
2624 PetscFunctionReturn(0);
2625 }
2626
2627 /* check whether perm is the identity mapping */
2628 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2629 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
2630 a->permute = PETSC_FALSE;
2631
2632 /* special case that simply copies fill pattern */
2633 if (!levels) {
2634 /* reuse the column pointers and row offsets for memory savings */
2635 ui = a->i;
2636 uj = a->j;
2637 free_ij = PETSC_FALSE;
2638 ratio_needed = 1.0;
2639 } else { /* case: levels>0 */
2640 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2641
2642 /* initialization */
2643 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2644 ui[0] = 0;
2645
2646 /* jl: linked list for storing indices of the pivot rows
2647 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2648 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr);
2649 for (i=0; i<am; i++) {
2650 jl[i] = am; il[i] = 0;
2651 }
2652
2653 /* create and initialize a linked list for storing column indices of the active row k */
2654 nlnk = am + 1;
2655 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2656
2657 /* initial FreeSpace size is fill*(ai[am]+1) */
2658 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2659
2660 current_space = free_space;
2661
2662 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr);
2663
2664 current_space_lvl = free_space_lvl;
2665
2666 for (k=0; k<am; k++) { /* for each active row k */
2667 /* initialize lnk by the column indices of row rip[k] */
2668 nzk = 0;
2669 ncols = ai[rip[k]+1] - ai[rip[k]];
2670 cols = aj+ai[rip[k]];
2671 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2672 nzk += nlnk;
2673
2674 /* update lnk by computing fill-in for each pivot row to be merged in */
2675 prow = jl[k]; /* 1st pivot row */
2676
2677 while (prow < k) {
2678 nextprow = jl[prow];
2679
2680 /* merge prow into k-th row */
2681 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2682 jmax = ui[prow+1];
2683 ncols = jmax-jmin;
2684 i = jmin - ui[prow];
2685 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2686 j = *(uj_lvl_ptr[prow] + i - 1);
2687 cols_lvl = uj_lvl_ptr[prow]+i;
2688 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2689 nzk += nlnk;
2690
2691 /* update il and jl for prow */
2692 if (jmin < jmax) {
2693 il[prow] = jmin;
2694 j = *cols;
2695 jl[prow] = jl[j];
2696 jl[j] = prow;
2697 }
2698 prow = nextprow;
2699 }
2700
2701 /* if free space is not available, make more free space */
2702 if (current_space->local_remaining<nzk) {
2703 i = am - k + 1; /* num of unfactored rows */
2704 i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2705 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr);
2706 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr);
2707 reallocs++;
2708 }
2709
2710 /* copy data into free_space and free_space_lvl, then initialize lnk */
2711 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2712
2713 /* add the k-th row into il and jl */
2714 if (nzk-1 > 0) {
2715 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2716 jl[k] = jl[i]; jl[i] = k;
2717 il[k] = ui[k] + 1;
2718 }
2719 uj_ptr[k] = current_space->array;
2720 uj_lvl_ptr[k] = current_space_lvl->array;
2721
2722 current_space->array += nzk;
2723 current_space->local_used += nzk;
2724 current_space->local_remaining -= nzk;
2725 current_space_lvl->array += nzk;
2726 current_space_lvl->local_used += nzk;
2727 current_space_lvl->local_remaining -= nzk;
2728
2729 ui[k+1] = ui[k] + nzk;
2730 }
2731
2732 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2733 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr);
2734
2735 /* destroy list of free space and other temporary array(s) */
2736 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2737 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2738 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2739 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2740 if (ai[am] != 0) {
2741 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2742 } else {
2743 ratio_needed = 0.0;
2744 }
2745 } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2746
2747 /* put together the new matrix in MATSEQSBAIJ format */
2748 ierr = MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2749
2750 b = (Mat_SeqSBAIJ*)(fact)->data;
2751
2752 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2753
2754 b->singlemalloc = PETSC_FALSE;
2755 b->free_a = PETSC_TRUE;
2756 b->free_ij = free_ij;
2757
2758 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2759
2760 b->j = uj;
2761 b->i = ui;
2762 b->diag = NULL;
2763 b->ilen = NULL;
2764 b->imax = NULL;
2765 b->row = perm;
2766 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2767
2768 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2769 b->icol = perm;
2770 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2771 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2772
2773 b->maxnz = b->nz = ui[am];
2774
2775 fact->info.factor_mallocs = reallocs;
2776 fact->info.fill_ratio_given = fill;
2777 fact->info.fill_ratio_needed = ratio_needed;
2778 #if defined(PETSC_USE_INFO)
2779 if (ai[am] != 0) {
2780 PetscReal af = fact->info.fill_ratio_needed;
2781 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2782 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2783 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2784 } else {
2785 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2786 }
2787 #endif
2788 if (perm_identity) {
2789 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace;
2790 } else {
2791 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace;
2792 }
2793 PetscFunctionReturn(0);
2794 }
2795