1
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <petscis.h>
6
MatGetInertia_SeqSBAIJ(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
8 {
9 Mat_SeqSBAIJ *fact=(Mat_SeqSBAIJ*)F->data;
10 MatScalar *dd=fact->a;
11 PetscInt mbs=fact->mbs,bs=F->rmap->bs,i,nneg_tmp,npos_tmp,*fi=fact->diag;
12
13 PetscFunctionBegin;
14 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
15 if (F->factorerrortype==MAT_FACTOR_NUMERIC_ZEROPIVOT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatFactor fails with numeric zeropivot");
16
17 nneg_tmp = 0; npos_tmp = 0;
18 for (i=0; i<mbs; i++) {
19 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
20 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21 fi++;
22 }
23 if (nneg) *nneg = nneg_tmp;
24 if (npos) *npos = npos_tmp;
25 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
26 PetscFunctionReturn(0);
27 }
28
29 /*
30 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
31 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
32 */
MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo * info)33 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info)
34 {
35 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
36 PetscErrorCode ierr;
37 const PetscInt *rip,*ai,*aj;
38 PetscInt i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2;
39 PetscInt m,reallocs = 0,prow;
40 PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
41 PetscReal f = info->fill;
42 PetscBool perm_identity;
43
44 PetscFunctionBegin;
45 /* check whether perm is the identity mapping */
46 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
47 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
48
49 if (perm_identity) { /* without permutation */
50 a->permute = PETSC_FALSE;
51
52 ai = a->i; aj = a->j;
53 } else { /* non-trivial permutation */
54 a->permute = PETSC_TRUE;
55
56 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
57
58 ai = a->inew; aj = a->jnew;
59 }
60
61 /* initialization */
62 ierr = PetscMalloc1(mbs+1,&iu);CHKERRQ(ierr);
63 umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
64 ierr = PetscMalloc1(umax,&ju);CHKERRQ(ierr);
65 iu[0] = mbs+1;
66 juidx = mbs + 1; /* index for ju */
67 /* jl linked list for pivot row -- linked list for col index */
68 ierr = PetscMalloc2(mbs,&jl,mbs,&q);CHKERRQ(ierr);
69 for (i=0; i<mbs; i++) {
70 jl[i] = mbs;
71 q[i] = 0;
72 }
73
74 /* for each row k */
75 for (k=0; k<mbs; k++) {
76 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */
77 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
78 q[k] = mbs;
79 /* initialize nonzero structure of k-th row to row rip[k] of A */
80 jmin = ai[rip[k]] +1; /* exclude diag[k] */
81 jmax = ai[rip[k]+1];
82 for (j=jmin; j<jmax; j++) {
83 vj = rip[aj[j]]; /* col. value */
84 if (vj > k) {
85 qm = k;
86 do {
87 m = qm; qm = q[m];
88 } while (qm < vj);
89 if (qm == vj) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A\n");
90 nzk++;
91 q[m] = vj;
92 q[vj] = qm;
93 } /* if (vj > k) */
94 } /* for (j=jmin; j<jmax; j++) */
95
96 /* modify nonzero structure of k-th row by computing fill-in
97 for each row i to be merged in */
98 prow = k;
99 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
100
101 while (prow < k) {
102 /* merge row prow into k-th row */
103 jmin = iu[prow] + 1; jmax = iu[prow+1];
104 qm = k;
105 for (j=jmin; j<jmax; j++) {
106 vj = ju[j];
107 do {
108 m = qm; qm = q[m];
109 } while (qm < vj);
110 if (qm != vj) {
111 nzk++; q[m] = vj; q[vj] = qm; qm = vj;
112 }
113 }
114 prow = jl[prow]; /* next pivot row */
115 }
116
117 /* add k to row list for first nonzero element in k-th row */
118 if (nzk > 0) {
119 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
120 jl[k] = jl[i]; jl[i] = k;
121 }
122 iu[k+1] = iu[k] + nzk;
123
124 /* allocate more space to ju if needed */
125 if (iu[k+1] > umax) {
126 /* estimate how much additional space we will need */
127 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
128 /* just double the memory each time */
129 maxadd = umax;
130 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
131 umax += maxadd;
132
133 /* allocate a longer ju */
134 ierr = PetscMalloc1(umax,&jutmp);CHKERRQ(ierr);
135 ierr = PetscArraycpy(jutmp,ju,iu[k]);CHKERRQ(ierr);
136 ierr = PetscFree(ju);CHKERRQ(ierr);
137 ju = jutmp;
138 reallocs++; /* count how many times we realloc */
139 }
140
141 /* save nonzero structure of k-th row in ju */
142 i=k;
143 while (nzk--) {
144 i = q[i];
145 ju[juidx++] = i;
146 }
147 }
148
149 #if defined(PETSC_USE_INFO)
150 if (ai[mbs] != 0) {
151 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
152 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
153 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
154 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
155 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
156 } else {
157 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
158 }
159 #endif
160
161 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
162 ierr = PetscFree2(jl,q);CHKERRQ(ierr);
163
164 /* put together the new matrix */
165 ierr = MatSeqSBAIJSetPreallocation(F,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
166
167 /* ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)iperm);CHKERRQ(ierr); */
168 b = (Mat_SeqSBAIJ*)(F)->data;
169 b->singlemalloc = PETSC_FALSE;
170 b->free_a = PETSC_TRUE;
171 b->free_ij = PETSC_TRUE;
172
173 ierr = PetscMalloc1((iu[mbs]+1)*bs2,&b->a);CHKERRQ(ierr);
174 b->j = ju;
175 b->i = iu;
176 b->diag = NULL;
177 b->ilen = NULL;
178 b->imax = NULL;
179 b->row = perm;
180
181 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
182
183 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
184
185 b->icol = perm;
186 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
187 ierr = PetscMalloc1(bs*mbs+bs,&b->solve_work);CHKERRQ(ierr);
188 /* In b structure: Free imax, ilen, old a, old j.
189 Allocate idnew, solve_work, new a, new j */
190 ierr = PetscLogObjectMemory((PetscObject)F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
191 b->maxnz = b->nz = iu[mbs];
192
193 (F)->info.factor_mallocs = reallocs;
194 (F)->info.fill_ratio_given = f;
195 if (ai[mbs] != 0) {
196 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
197 } else {
198 (F)->info.fill_ratio_needed = 0.0;
199 }
200 ierr = MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity);CHKERRQ(ierr);
201 PetscFunctionReturn(0);
202 }
203 /*
204 Symbolic U^T*D*U factorization for SBAIJ format.
205 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
206 */
207 #include <petscbt.h>
208 #include <../src/mat/utils/freespace.h>
MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)209 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
210 {
211 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
212 Mat_SeqSBAIJ *b;
213 PetscErrorCode ierr;
214 PetscBool perm_identity,missing;
215 PetscReal fill = info->fill;
216 const PetscInt *rip,*ai=a->i,*aj=a->j;
217 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow;
218 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
219 PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
220 PetscFreeSpaceList free_space=NULL,current_space=NULL;
221 PetscBT lnkbt;
222
223 PetscFunctionBegin;
224 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
225 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
226 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
227 if (bs > 1) {
228 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);CHKERRQ(ierr);
229 PetscFunctionReturn(0);
230 }
231
232 /* check whether perm is the identity mapping */
233 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
234 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
235 a->permute = PETSC_FALSE;
236 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
237
238 /* initialization */
239 ierr = PetscMalloc1(mbs+1,&ui);CHKERRQ(ierr);
240 ierr = PetscMalloc1(mbs+1,&udiag);CHKERRQ(ierr);
241 ui[0] = 0;
242
243 /* jl: linked list for storing indices of the pivot rows
244 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
245 ierr = PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);CHKERRQ(ierr);
246 for (i=0; i<mbs; i++) {
247 jl[i] = mbs; il[i] = 0;
248 }
249
250 /* create and initialize a linked list for storing column indices of the active row k */
251 nlnk = mbs + 1;
252 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
253
254 /* initial FreeSpace size is fill*(ai[mbs]+1) */
255 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space);CHKERRQ(ierr);
256 current_space = free_space;
257
258 for (k=0; k<mbs; k++) { /* for each active row k */
259 /* initialize lnk by the column indices of row rip[k] of A */
260 nzk = 0;
261 ncols = ai[k+1] - ai[k];
262 if (!ncols) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k);
263 for (j=0; j<ncols; j++) {
264 i = *(aj + ai[k] + j);
265 cols[j] = i;
266 }
267 ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
268 nzk += nlnk;
269
270 /* update lnk by computing fill-in for each pivot row to be merged in */
271 prow = jl[k]; /* 1st pivot row */
272
273 while (prow < k) {
274 nextprow = jl[prow];
275 /* merge prow into k-th row */
276 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
277 jmax = ui[prow+1];
278 ncols = jmax-jmin;
279 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
280 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
281 nzk += nlnk;
282
283 /* update il and jl for prow */
284 if (jmin < jmax) {
285 il[prow] = jmin;
286 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
287 }
288 prow = nextprow;
289 }
290
291 /* if free space is not available, make more free space */
292 if (current_space->local_remaining<nzk) {
293 i = mbs - k + 1; /* num of unfactored rows */
294 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
295 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr);
296 reallocs++;
297 }
298
299 /* copy data into free space, then initialize lnk */
300 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
301
302 /* add the k-th row into il and jl */
303 if (nzk > 1) {
304 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
305 jl[k] = jl[i]; jl[i] = k;
306 il[k] = ui[k] + 1;
307 }
308 ui_ptr[k] = current_space->array;
309
310 current_space->array += nzk;
311 current_space->local_used += nzk;
312 current_space->local_remaining -= nzk;
313
314 ui[k+1] = ui[k] + nzk;
315 }
316
317 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
318 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr);
319
320 /* destroy list of free space and other temporary array(s) */
321 ierr = PetscMalloc1(ui[mbs]+1,&uj);CHKERRQ(ierr);
322 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
323 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
324
325 /* put together the new matrix in MATSEQSBAIJ format */
326 ierr = MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
327
328 b = (Mat_SeqSBAIJ*)fact->data;
329 b->singlemalloc = PETSC_FALSE;
330 b->free_a = PETSC_TRUE;
331 b->free_ij = PETSC_TRUE;
332
333 ierr = PetscMalloc1(ui[mbs]+1,&b->a);CHKERRQ(ierr);
334
335 b->j = uj;
336 b->i = ui;
337 b->diag = udiag;
338 b->free_diag = PETSC_TRUE;
339 b->ilen = NULL;
340 b->imax = NULL;
341 b->row = perm;
342 b->icol = perm;
343
344 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
345 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
346
347 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
348
349 ierr = PetscMalloc1(mbs+1,&b->solve_work);CHKERRQ(ierr);
350 ierr = PetscLogObjectMemory((PetscObject)fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
351
352 b->maxnz = b->nz = ui[mbs];
353
354 fact->info.factor_mallocs = reallocs;
355 fact->info.fill_ratio_given = fill;
356 if (ai[mbs] != 0) {
357 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
358 } else {
359 fact->info.fill_ratio_needed = 0.0;
360 }
361 #if defined(PETSC_USE_INFO)
362 if (ai[mbs] != 0) {
363 PetscReal af = fact->info.fill_ratio_needed;
364 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
365 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
366 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
367 } else {
368 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
369 }
370 #endif
371 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
372 PetscFunctionReturn(0);
373 }
374
MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo * info)375 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
376 {
377 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
378 Mat_SeqSBAIJ *b;
379 PetscErrorCode ierr;
380 PetscBool perm_identity,missing;
381 PetscReal fill = info->fill;
382 const PetscInt *rip,*ai,*aj;
383 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
384 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
385 PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
386 PetscFreeSpaceList free_space=NULL,current_space=NULL;
387 PetscBT lnkbt;
388
389 PetscFunctionBegin;
390 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
391 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
392
393 /*
394 This code originally uses Modified Sparse Row (MSR) storage
395 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
396 Then it is rewritten so the factor B takes seqsbaij format. However the associated
397 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
398 thus the original code in MSR format is still used for these cases.
399 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
400 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
401 */
402 if (bs > 1) {
403 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr);
404 PetscFunctionReturn(0);
405 }
406
407 /* check whether perm is the identity mapping */
408 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
409
410 if (perm_identity) {
411 a->permute = PETSC_FALSE;
412
413 ai = a->i; aj = a->j;
414 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
415 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
416
417 /* initialization */
418 ierr = PetscMalloc1(mbs+1,&ui);CHKERRQ(ierr);
419 ui[0] = 0;
420
421 /* jl: linked list for storing indices of the pivot rows
422 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
423 ierr = PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);CHKERRQ(ierr);
424 for (i=0; i<mbs; i++) {
425 jl[i] = mbs; il[i] = 0;
426 }
427
428 /* create and initialize a linked list for storing column indices of the active row k */
429 nlnk = mbs + 1;
430 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
431
432 /* initial FreeSpace size is fill*(ai[mbs]+1) */
433 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space);CHKERRQ(ierr);
434 current_space = free_space;
435
436 for (k=0; k<mbs; k++) { /* for each active row k */
437 /* initialize lnk by the column indices of row rip[k] of A */
438 nzk = 0;
439 ncols = ai[rip[k]+1] - ai[rip[k]];
440 for (j=0; j<ncols; j++) {
441 i = *(aj + ai[rip[k]] + j);
442 cols[j] = rip[i];
443 }
444 ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
445 nzk += nlnk;
446
447 /* update lnk by computing fill-in for each pivot row to be merged in */
448 prow = jl[k]; /* 1st pivot row */
449
450 while (prow < k) {
451 nextprow = jl[prow];
452 /* merge prow into k-th row */
453 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
454 jmax = ui[prow+1];
455 ncols = jmax-jmin;
456 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
457 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
458 nzk += nlnk;
459
460 /* update il and jl for prow */
461 if (jmin < jmax) {
462 il[prow] = jmin;
463
464 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
465 }
466 prow = nextprow;
467 }
468
469 /* if free space is not available, make more free space */
470 if (current_space->local_remaining<nzk) {
471 i = mbs - k + 1; /* num of unfactored rows */
472 i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
473 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr);
474 reallocs++;
475 }
476
477 /* copy data into free space, then initialize lnk */
478 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
479
480 /* add the k-th row into il and jl */
481 if (nzk-1 > 0) {
482 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
483 jl[k] = jl[i]; jl[i] = k;
484 il[k] = ui[k] + 1;
485 }
486 ui_ptr[k] = current_space->array;
487
488 current_space->array += nzk;
489 current_space->local_used += nzk;
490 current_space->local_remaining -= nzk;
491
492 ui[k+1] = ui[k] + nzk;
493 }
494
495 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
496 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr);
497
498 /* destroy list of free space and other temporary array(s) */
499 ierr = PetscMalloc1(ui[mbs]+1,&uj);CHKERRQ(ierr);
500 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
501 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
502
503 /* put together the new matrix in MATSEQSBAIJ format */
504 ierr = MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
505
506 b = (Mat_SeqSBAIJ*)fact->data;
507 b->singlemalloc = PETSC_FALSE;
508 b->free_a = PETSC_TRUE;
509 b->free_ij = PETSC_TRUE;
510
511 ierr = PetscMalloc1(ui[mbs]+1,&b->a);CHKERRQ(ierr);
512
513 b->j = uj;
514 b->i = ui;
515 b->diag = NULL;
516 b->ilen = NULL;
517 b->imax = NULL;
518 b->row = perm;
519
520 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
521
522 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
523 b->icol = perm;
524 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
525 ierr = PetscMalloc1(mbs+1,&b->solve_work);CHKERRQ(ierr);
526 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
527 b->maxnz = b->nz = ui[mbs];
528
529 fact->info.factor_mallocs = reallocs;
530 fact->info.fill_ratio_given = fill;
531 if (ai[mbs] != 0) {
532 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
533 } else {
534 fact->info.fill_ratio_needed = 0.0;
535 }
536 #if defined(PETSC_USE_INFO)
537 if (ai[mbs] != 0) {
538 PetscReal af = fact->info.fill_ratio_needed;
539 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
540 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
541 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
542 } else {
543 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
544 }
545 #endif
546 ierr = MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);CHKERRQ(ierr);
547 PetscFunctionReturn(0);
548 }
549
MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo * info)550 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
551 {
552 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
553 IS perm = b->row;
554 PetscErrorCode ierr;
555 const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j;
556 PetscInt i,j;
557 PetscInt *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
558 PetscInt bs =A->rmap->bs,bs2 = a->bs2;
559 MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
560 MatScalar *u,*diag,*rtmp,*rtmp_ptr;
561 MatScalar *work;
562 PetscInt *pivots;
563 PetscBool allowzeropivot,zeropivotdetected;
564
565 PetscFunctionBegin;
566 /* initialization */
567 ierr = PetscCalloc1(bs2*mbs,&rtmp);CHKERRQ(ierr);
568 ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr);
569 allowzeropivot = PetscNot(A->erroriffailure);
570
571 il[0] = 0;
572 for (i=0; i<mbs; i++) jl[i] = mbs;
573
574 ierr = PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work);CHKERRQ(ierr);
575 ierr = PetscMalloc1(bs,&pivots);CHKERRQ(ierr);
576
577 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
578
579 /* check permutation */
580 if (!a->permute) {
581 ai = a->i; aj = a->j; aa = a->a;
582 } else {
583 ai = a->inew; aj = a->jnew;
584 ierr = PetscMalloc1(bs2*ai[mbs],&aa);CHKERRQ(ierr);
585 ierr = PetscArraycpy(aa,a->a,bs2*ai[mbs]);CHKERRQ(ierr);
586 ierr = PetscMalloc1(ai[mbs],&a2anew);CHKERRQ(ierr);
587 ierr = PetscArraycpy(a2anew,a->a2anew,ai[mbs]);CHKERRQ(ierr);
588
589 for (i=0; i<mbs; i++) {
590 jmin = ai[i]; jmax = ai[i+1];
591 for (j=jmin; j<jmax; j++) {
592 while (a2anew[j] != j) {
593 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
594 for (k1=0; k1<bs2; k1++) {
595 dk[k1] = aa[k*bs2+k1];
596 aa[k*bs2+k1] = aa[j*bs2+k1];
597 aa[j*bs2+k1] = dk[k1];
598 }
599 }
600 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
601 if (i > aj[j]) {
602 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */
603 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
604 for (k=0; k<bs; k++) { /* j-th block of aa <- dk^T */
605 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
606 }
607 }
608 }
609 }
610 ierr = PetscFree(a2anew);CHKERRQ(ierr);
611 }
612
613 /* for each row k */
614 for (k = 0; k<mbs; k++) {
615
616 /*initialize k-th row with elements nonzero in row perm(k) of A */
617 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
618
619 ap = aa + jmin*bs2;
620 for (j = jmin; j < jmax; j++) {
621 vj = perm_ptr[aj[j]]; /* block col. index */
622 rtmp_ptr = rtmp + vj*bs2;
623 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
624 }
625
626 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
627 ierr = PetscArraycpy(dk,rtmp+k*bs2,bs2);CHKERRQ(ierr);
628 i = jl[k]; /* first row to be added to k_th row */
629
630 while (i < k) {
631 nexti = jl[i]; /* next row to be added to k_th row */
632
633 /* compute multiplier */
634 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
635
636 /* uik = -inv(Di)*U_bar(i,k) */
637 diag = ba + i*bs2;
638 u = ba + ili*bs2;
639 ierr = PetscArrayzero(uik,bs2);CHKERRQ(ierr);
640 PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
641
642 /* update D(k) += -U(i,k)^T * U_bar(i,k) */
643 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
644 ierr = PetscLogFlops(4.0*bs*bs2);CHKERRQ(ierr);
645
646 /* update -U(i,k) */
647 ierr = PetscArraycpy(ba+ili*bs2,uik,bs2);CHKERRQ(ierr);
648
649 /* add multiple of row i to k-th row ... */
650 jmin = ili + 1; jmax = bi[i+1];
651 if (jmin < jmax) {
652 for (j=jmin; j<jmax; j++) {
653 /* rtmp += -U(i,k)^T * U_bar(i,j) */
654 rtmp_ptr = rtmp + bj[j]*bs2;
655 u = ba + j*bs2;
656 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
657 }
658 ierr = PetscLogFlops(2.0*bs*bs2*(jmax-jmin));CHKERRQ(ierr);
659
660 /* ... add i to row list for next nonzero entry */
661 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
662 j = bj[jmin];
663 jl[i] = jl[j]; jl[j] = i; /* update jl */
664 }
665 i = nexti;
666 }
667
668 /* save nonzero entries in k-th row of U ... */
669
670 /* invert diagonal block */
671 diag = ba+k*bs2;
672 ierr = PetscArraycpy(diag,dk,bs2);CHKERRQ(ierr);
673
674 ierr = PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
675 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
676
677 jmin = bi[k]; jmax = bi[k+1];
678 if (jmin < jmax) {
679 for (j=jmin; j<jmax; j++) {
680 vj = bj[j]; /* block col. index of U */
681 u = ba + j*bs2;
682 rtmp_ptr = rtmp + vj*bs2;
683 for (k1=0; k1<bs2; k1++) {
684 *u++ = *rtmp_ptr;
685 *rtmp_ptr++ = 0.0;
686 }
687 }
688
689 /* ... add k to row list for first nonzero entry in k-th row */
690 il[k] = jmin;
691 i = bj[jmin];
692 jl[k] = jl[i]; jl[i] = k;
693 }
694 }
695
696 ierr = PetscFree(rtmp);CHKERRQ(ierr);
697 ierr = PetscFree2(il,jl);CHKERRQ(ierr);
698 ierr = PetscFree3(dk,uik,work);CHKERRQ(ierr);
699 ierr = PetscFree(pivots);CHKERRQ(ierr);
700 if (a->permute) {
701 ierr = PetscFree(aa);CHKERRQ(ierr);
702 }
703
704 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
705
706 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace;
707 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
708 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace;
709 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace;
710
711 C->assembled = PETSC_TRUE;
712 C->preallocated = PETSC_TRUE;
713
714 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
715 PetscFunctionReturn(0);
716 }
717
MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo * info)718 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
719 {
720 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
721 PetscErrorCode ierr;
722 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
723 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
724 PetscInt bs =A->rmap->bs,bs2 = a->bs2;
725 MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
726 MatScalar *u,*diag,*rtmp,*rtmp_ptr;
727 MatScalar *work;
728 PetscInt *pivots;
729 PetscBool allowzeropivot,zeropivotdetected;
730
731 PetscFunctionBegin;
732 ierr = PetscCalloc1(bs2*mbs,&rtmp);CHKERRQ(ierr);
733 ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr);
734 il[0] = 0;
735 for (i=0; i<mbs; i++) jl[i] = mbs;
736
737 ierr = PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work);CHKERRQ(ierr);
738 ierr = PetscMalloc1(bs,&pivots);CHKERRQ(ierr);
739 allowzeropivot = PetscNot(A->erroriffailure);
740
741 ai = a->i; aj = a->j; aa = a->a;
742
743 /* for each row k */
744 for (k = 0; k<mbs; k++) {
745
746 /*initialize k-th row with elements nonzero in row k of A */
747 jmin = ai[k]; jmax = ai[k+1];
748 ap = aa + jmin*bs2;
749 for (j = jmin; j < jmax; j++) {
750 vj = aj[j]; /* block col. index */
751 rtmp_ptr = rtmp + vj*bs2;
752 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
753 }
754
755 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
756 ierr = PetscArraycpy(dk,rtmp+k*bs2,bs2);CHKERRQ(ierr);
757 i = jl[k]; /* first row to be added to k_th row */
758
759 while (i < k) {
760 nexti = jl[i]; /* next row to be added to k_th row */
761
762 /* compute multiplier */
763 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
764
765 /* uik = -inv(Di)*U_bar(i,k) */
766 diag = ba + i*bs2;
767 u = ba + ili*bs2;
768 ierr = PetscArrayzero(uik,bs2);CHKERRQ(ierr);
769 PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
770
771 /* update D(k) += -U(i,k)^T * U_bar(i,k) */
772 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
773 ierr = PetscLogFlops(2.0*bs*bs2);CHKERRQ(ierr);
774
775 /* update -U(i,k) */
776 ierr = PetscArraycpy(ba+ili*bs2,uik,bs2);CHKERRQ(ierr);
777
778 /* add multiple of row i to k-th row ... */
779 jmin = ili + 1; jmax = bi[i+1];
780 if (jmin < jmax) {
781 for (j=jmin; j<jmax; j++) {
782 /* rtmp += -U(i,k)^T * U_bar(i,j) */
783 rtmp_ptr = rtmp + bj[j]*bs2;
784 u = ba + j*bs2;
785 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
786 }
787 ierr = PetscLogFlops(2.0*bs*bs2*(jmax-jmin));CHKERRQ(ierr);
788
789 /* ... add i to row list for next nonzero entry */
790 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
791 j = bj[jmin];
792 jl[i] = jl[j]; jl[j] = i; /* update jl */
793 }
794 i = nexti;
795 }
796
797 /* save nonzero entries in k-th row of U ... */
798
799 /* invert diagonal block */
800 diag = ba+k*bs2;
801 ierr = PetscArraycpy(diag,dk,bs2);CHKERRQ(ierr);
802
803 ierr = PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
804 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
805
806 jmin = bi[k]; jmax = bi[k+1];
807 if (jmin < jmax) {
808 for (j=jmin; j<jmax; j++) {
809 vj = bj[j]; /* block col. index of U */
810 u = ba + j*bs2;
811 rtmp_ptr = rtmp + vj*bs2;
812 for (k1=0; k1<bs2; k1++) {
813 *u++ = *rtmp_ptr;
814 *rtmp_ptr++ = 0.0;
815 }
816 }
817
818 /* ... add k to row list for first nonzero entry in k-th row */
819 il[k] = jmin;
820 i = bj[jmin];
821 jl[k] = jl[i]; jl[i] = k;
822 }
823 }
824
825 ierr = PetscFree(rtmp);CHKERRQ(ierr);
826 ierr = PetscFree2(il,jl);CHKERRQ(ierr);
827 ierr = PetscFree3(dk,uik,work);CHKERRQ(ierr);
828 ierr = PetscFree(pivots);CHKERRQ(ierr);
829
830 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
831 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
832 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
833 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
834 C->assembled = PETSC_TRUE;
835 C->preallocated = PETSC_TRUE;
836
837 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
838 PetscFunctionReturn(0);
839 }
840
841 /*
842 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
843 Version for blocks 2 by 2.
844 */
MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo * info)845 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info)
846 {
847 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
848 IS perm = b->row;
849 PetscErrorCode ierr;
850 const PetscInt *ai,*aj,*perm_ptr;
851 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
852 PetscInt *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
853 MatScalar *ba = b->a,*aa,*ap;
854 MatScalar *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4];
855 PetscReal shift = info->shiftamount;
856 PetscBool allowzeropivot,zeropivotdetected;
857
858 PetscFunctionBegin;
859 allowzeropivot = PetscNot(A->erroriffailure);
860
861 /* initialization */
862 /* il and jl record the first nonzero element in each row of the accessing
863 window U(0:k, k:mbs-1).
864 jl: list of rows to be added to uneliminated rows
865 i>= k: jl(i) is the first row to be added to row i
866 i< k: jl(i) is the row following row i in some list of rows
867 jl(i) = mbs indicates the end of a list
868 il(i): points to the first nonzero element in columns k,...,mbs-1 of
869 row i of U */
870 ierr = PetscCalloc1(4*mbs,&rtmp);CHKERRQ(ierr);
871 ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr);
872 il[0] = 0;
873 for (i=0; i<mbs; i++) jl[i] = mbs;
874
875 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
876
877 /* check permutation */
878 if (!a->permute) {
879 ai = a->i; aj = a->j; aa = a->a;
880 } else {
881 ai = a->inew; aj = a->jnew;
882 ierr = PetscMalloc1(4*ai[mbs],&aa);CHKERRQ(ierr);
883 ierr = PetscArraycpy(aa,a->a,4*ai[mbs]);CHKERRQ(ierr);
884 ierr = PetscMalloc1(ai[mbs],&a2anew);CHKERRQ(ierr);
885 ierr = PetscArraycpy(a2anew,a->a2anew,ai[mbs]);CHKERRQ(ierr);
886
887 for (i=0; i<mbs; i++) {
888 jmin = ai[i]; jmax = ai[i+1];
889 for (j=jmin; j<jmax; j++) {
890 while (a2anew[j] != j) {
891 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
892 for (k1=0; k1<4; k1++) {
893 dk[k1] = aa[k*4+k1];
894 aa[k*4+k1] = aa[j*4+k1];
895 aa[j*4+k1] = dk[k1];
896 }
897 }
898 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
899 if (i > aj[j]) {
900 ap = aa + j*4; /* ptr to the beginning of the block */
901 dk[1] = ap[1]; /* swap ap[1] and ap[2] */
902 ap[1] = ap[2];
903 ap[2] = dk[1];
904 }
905 }
906 }
907 ierr = PetscFree(a2anew);CHKERRQ(ierr);
908 }
909
910 /* for each row k */
911 for (k = 0; k<mbs; k++) {
912
913 /*initialize k-th row with elements nonzero in row perm(k) of A */
914 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
915 ap = aa + jmin*4;
916 for (j = jmin; j < jmax; j++) {
917 vj = perm_ptr[aj[j]]; /* block col. index */
918 rtmp_ptr = rtmp + vj*4;
919 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
920 }
921
922 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
923 ierr = PetscArraycpy(dk,rtmp+k*4,4);CHKERRQ(ierr);
924 i = jl[k]; /* first row to be added to k_th row */
925
926 while (i < k) {
927 nexti = jl[i]; /* next row to be added to k_th row */
928
929 /* compute multiplier */
930 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
931
932 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
933 diag = ba + i*4;
934 u = ba + ili*4;
935 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
936 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
937 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
938 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
939
940 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
941 dk[0] += uik[0]*u[0] + uik[1]*u[1];
942 dk[1] += uik[2]*u[0] + uik[3]*u[1];
943 dk[2] += uik[0]*u[2] + uik[1]*u[3];
944 dk[3] += uik[2]*u[2] + uik[3]*u[3];
945
946 ierr = PetscLogFlops(16.0*2.0);CHKERRQ(ierr);
947
948 /* update -U(i,k): ba[ili] = uik */
949 ierr = PetscArraycpy(ba+ili*4,uik,4);CHKERRQ(ierr);
950
951 /* add multiple of row i to k-th row ... */
952 jmin = ili + 1; jmax = bi[i+1];
953 if (jmin < jmax) {
954 for (j=jmin; j<jmax; j++) {
955 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
956 rtmp_ptr = rtmp + bj[j]*4;
957 u = ba + j*4;
958 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
959 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
960 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
961 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
962 }
963 ierr = PetscLogFlops(16.0*(jmax-jmin));CHKERRQ(ierr);
964
965 /* ... add i to row list for next nonzero entry */
966 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
967 j = bj[jmin];
968 jl[i] = jl[j]; jl[j] = i; /* update jl */
969 }
970 i = nexti;
971 }
972
973 /* save nonzero entries in k-th row of U ... */
974
975 /* invert diagonal block */
976 diag = ba+k*4;
977 ierr = PetscArraycpy(diag,dk,4);CHKERRQ(ierr);
978 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
979 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
980
981 jmin = bi[k]; jmax = bi[k+1];
982 if (jmin < jmax) {
983 for (j=jmin; j<jmax; j++) {
984 vj = bj[j]; /* block col. index of U */
985 u = ba + j*4;
986 rtmp_ptr = rtmp + vj*4;
987 for (k1=0; k1<4; k1++) {
988 *u++ = *rtmp_ptr;
989 *rtmp_ptr++ = 0.0;
990 }
991 }
992
993 /* ... add k to row list for first nonzero entry in k-th row */
994 il[k] = jmin;
995 i = bj[jmin];
996 jl[k] = jl[i]; jl[i] = k;
997 }
998 }
999
1000 ierr = PetscFree(rtmp);CHKERRQ(ierr);
1001 ierr = PetscFree2(il,jl);CHKERRQ(ierr);
1002 if (a->permute) {
1003 ierr = PetscFree(aa);CHKERRQ(ierr);
1004 }
1005 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
1006
1007 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace;
1008 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1009 C->assembled = PETSC_TRUE;
1010 C->preallocated = PETSC_TRUE;
1011
1012 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
1013 PetscFunctionReturn(0);
1014 }
1015
1016 /*
1017 Version for when blocks are 2 by 2 Using natural ordering
1018 */
MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo * info)1019 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
1020 {
1021 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
1022 PetscErrorCode ierr;
1023 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1024 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1025 MatScalar *ba = b->a,*aa,*ap,dk[8],uik[8];
1026 MatScalar *u,*diag,*rtmp,*rtmp_ptr;
1027 PetscReal shift = info->shiftamount;
1028 PetscBool allowzeropivot,zeropivotdetected;
1029
1030 PetscFunctionBegin;
1031 allowzeropivot = PetscNot(A->erroriffailure);
1032
1033 /* initialization */
1034 /* il and jl record the first nonzero element in each row of the accessing
1035 window U(0:k, k:mbs-1).
1036 jl: list of rows to be added to uneliminated rows
1037 i>= k: jl(i) is the first row to be added to row i
1038 i< k: jl(i) is the row following row i in some list of rows
1039 jl(i) = mbs indicates the end of a list
1040 il(i): points to the first nonzero element in columns k,...,mbs-1 of
1041 row i of U */
1042 ierr = PetscCalloc1(4*mbs,&rtmp);CHKERRQ(ierr);
1043 ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr);
1044 il[0] = 0;
1045 for (i=0; i<mbs; i++) jl[i] = mbs;
1046
1047 ai = a->i; aj = a->j; aa = a->a;
1048
1049 /* for each row k */
1050 for (k = 0; k<mbs; k++) {
1051
1052 /*initialize k-th row with elements nonzero in row k of A */
1053 jmin = ai[k]; jmax = ai[k+1];
1054 ap = aa + jmin*4;
1055 for (j = jmin; j < jmax; j++) {
1056 vj = aj[j]; /* block col. index */
1057 rtmp_ptr = rtmp + vj*4;
1058 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1059 }
1060
1061 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1062 ierr = PetscArraycpy(dk,rtmp+k*4,4);CHKERRQ(ierr);
1063 i = jl[k]; /* first row to be added to k_th row */
1064
1065 while (i < k) {
1066 nexti = jl[i]; /* next row to be added to k_th row */
1067
1068 /* compute multiplier */
1069 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1070
1071 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1072 diag = ba + i*4;
1073 u = ba + ili*4;
1074 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1075 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1076 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1077 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1078
1079 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1080 dk[0] += uik[0]*u[0] + uik[1]*u[1];
1081 dk[1] += uik[2]*u[0] + uik[3]*u[1];
1082 dk[2] += uik[0]*u[2] + uik[1]*u[3];
1083 dk[3] += uik[2]*u[2] + uik[3]*u[3];
1084
1085 ierr = PetscLogFlops(16.0*2.0);CHKERRQ(ierr);
1086
1087 /* update -U(i,k): ba[ili] = uik */
1088 ierr = PetscArraycpy(ba+ili*4,uik,4);CHKERRQ(ierr);
1089
1090 /* add multiple of row i to k-th row ... */
1091 jmin = ili + 1; jmax = bi[i+1];
1092 if (jmin < jmax) {
1093 for (j=jmin; j<jmax; j++) {
1094 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1095 rtmp_ptr = rtmp + bj[j]*4;
1096 u = ba + j*4;
1097 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1098 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1099 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1100 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1101 }
1102 ierr = PetscLogFlops(16.0*(jmax-jmin));CHKERRQ(ierr);
1103
1104 /* ... add i to row list for next nonzero entry */
1105 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1106 j = bj[jmin];
1107 jl[i] = jl[j]; jl[j] = i; /* update jl */
1108 }
1109 i = nexti;
1110 }
1111
1112 /* save nonzero entries in k-th row of U ... */
1113
1114 /* invert diagonal block */
1115 diag = ba+k*4;
1116 ierr = PetscArraycpy(diag,dk,4);CHKERRQ(ierr);
1117 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
1118 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1119
1120 jmin = bi[k]; jmax = bi[k+1];
1121 if (jmin < jmax) {
1122 for (j=jmin; j<jmax; j++) {
1123 vj = bj[j]; /* block col. index of U */
1124 u = ba + j*4;
1125 rtmp_ptr = rtmp + vj*4;
1126 for (k1=0; k1<4; k1++) {
1127 *u++ = *rtmp_ptr;
1128 *rtmp_ptr++ = 0.0;
1129 }
1130 }
1131
1132 /* ... add k to row list for first nonzero entry in k-th row */
1133 il[k] = jmin;
1134 i = bj[jmin];
1135 jl[k] = jl[i]; jl[i] = k;
1136 }
1137 }
1138
1139 ierr = PetscFree(rtmp);CHKERRQ(ierr);
1140 ierr = PetscFree2(il,jl);CHKERRQ(ierr);
1141
1142 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1143 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1144 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1145 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1146 C->assembled = PETSC_TRUE;
1147 C->preallocated = PETSC_TRUE;
1148
1149 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
1150 PetscFunctionReturn(0);
1151 }
1152
1153 /*
1154 Numeric U^T*D*U factorization for SBAIJ format.
1155 Version for blocks are 1 by 1.
1156 */
MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo * info)1157 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
1158 {
1159 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1160 IS ip=b->row;
1161 PetscErrorCode ierr;
1162 const PetscInt *ai,*aj,*rip;
1163 PetscInt *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1164 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1165 MatScalar *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1166 PetscReal rs;
1167 FactorShiftCtx sctx;
1168
1169 PetscFunctionBegin;
1170 /* MatPivotSetUp(): initialize shift context sctx */
1171 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
1172
1173 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1174 if (!a->permute) {
1175 ai = a->i; aj = a->j; aa = a->a;
1176 } else {
1177 ai = a->inew; aj = a->jnew;
1178 nz = ai[mbs];
1179 ierr = PetscMalloc1(nz,&aa);CHKERRQ(ierr);
1180 a2anew = a->a2anew;
1181 bval = a->a;
1182 for (j=0; j<nz; j++) {
1183 aa[a2anew[j]] = *(bval++);
1184 }
1185 }
1186
1187 /* initialization */
1188 /* il and jl record the first nonzero element in each row of the accessing
1189 window U(0:k, k:mbs-1).
1190 jl: list of rows to be added to uneliminated rows
1191 i>= k: jl(i) is the first row to be added to row i
1192 i< k: jl(i) is the row following row i in some list of rows
1193 jl(i) = mbs indicates the end of a list
1194 il(i): points to the first nonzero element in columns k,...,mbs-1 of
1195 row i of U */
1196 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr);
1197
1198 do {
1199 sctx.newshift = PETSC_FALSE;
1200 il[0] = 0;
1201 for (i=0; i<mbs; i++) {
1202 rtmp[i] = 0.0; jl[i] = mbs;
1203 }
1204
1205 for (k = 0; k<mbs; k++) {
1206 /*initialize k-th row by the perm[k]-th row of A */
1207 jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1208 bval = ba + bi[k];
1209 for (j = jmin; j < jmax; j++) {
1210 col = rip[aj[j]];
1211 rtmp[col] = aa[j];
1212 *bval++ = 0.0; /* for in-place factorization */
1213 }
1214
1215 /* shift the diagonal of the matrix */
1216 if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1217
1218 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1219 dk = rtmp[k];
1220 i = jl[k]; /* first row to be added to k_th row */
1221
1222 while (i < k) {
1223 nexti = jl[i]; /* next row to be added to k_th row */
1224
1225 /* compute multiplier, update diag(k) and U(i,k) */
1226 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1227 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
1228 dk += uikdi*ba[ili];
1229 ba[ili] = uikdi; /* -U(i,k) */
1230
1231 /* add multiple of row i to k-th row */
1232 jmin = ili + 1; jmax = bi[i+1];
1233 if (jmin < jmax) {
1234 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1235 ierr = PetscLogFlops(2.0*(jmax-jmin));CHKERRQ(ierr);
1236
1237 /* update il and jl for row i */
1238 il[i] = jmin;
1239 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1240 }
1241 i = nexti;
1242 }
1243
1244 /* shift the diagonals when zero pivot is detected */
1245 /* compute rs=sum of abs(off-diagonal) */
1246 rs = 0.0;
1247 jmin = bi[k]+1;
1248 nz = bi[k+1] - jmin;
1249 if (nz) {
1250 bcol = bj + jmin;
1251 while (nz--) {
1252 rs += PetscAbsScalar(rtmp[*bcol]);
1253 bcol++;
1254 }
1255 }
1256
1257 sctx.rs = rs;
1258 sctx.pv = dk;
1259 ierr = MatPivotCheck(C,A,info,&sctx,k);CHKERRQ(ierr);
1260 if (sctx.newshift) break; /* sctx.shift_amount is updated */
1261 dk = sctx.pv;
1262
1263 /* copy data into U(k,:) */
1264 ba[bi[k]] = 1.0/dk; /* U(k,k) */
1265 jmin = bi[k]+1; jmax = bi[k+1];
1266 if (jmin < jmax) {
1267 for (j=jmin; j<jmax; j++) {
1268 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1269 }
1270 /* add the k-th row into il and jl */
1271 il[k] = jmin;
1272 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1273 }
1274 }
1275 } while (sctx.newshift);
1276 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
1277 if (a->permute) {ierr = PetscFree(aa);CHKERRQ(ierr);}
1278
1279 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1280
1281 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
1282 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1283 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1284 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
1285 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
1286 C->assembled = PETSC_TRUE;
1287 C->preallocated = PETSC_TRUE;
1288
1289 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr);
1290 if (sctx.nshift) {
1291 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1292 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
1293 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1294 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
1295 }
1296 }
1297 PetscFunctionReturn(0);
1298 }
1299
1300 /*
1301 Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1302 Modified from MatCholeskyFactorNumeric_SeqAIJ()
1303 */
MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo * info)1304 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
1305 {
1306 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1307 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)B->data;
1308 PetscErrorCode ierr;
1309 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
1310 PetscInt *ai=a->i,*aj=a->j,*ajtmp;
1311 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
1312 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1313 FactorShiftCtx sctx;
1314 PetscReal rs;
1315 MatScalar d,*v;
1316
1317 PetscFunctionBegin;
1318 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr);
1319
1320 /* MatPivotSetUp(): initialize shift context sctx */
1321 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
1322
1323 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1324 sctx.shift_top = info->zeropivot;
1325
1326 ierr = PetscArrayzero(rtmp,mbs);CHKERRQ(ierr);
1327
1328 for (i=0; i<mbs; i++) {
1329 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1330 d = (aa)[a->diag[i]];
1331 rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1332 ajtmp = aj + ai[i] + 1; /* exclude diagonal */
1333 v = aa + ai[i] + 1;
1334 nz = ai[i+1] - ai[i] - 1;
1335 for (j=0; j<nz; j++) {
1336 rtmp[i] += PetscAbsScalar(v[j]);
1337 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1338 }
1339 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1340 }
1341 sctx.shift_top *= 1.1;
1342 sctx.nshift_max = 5;
1343 sctx.shift_lo = 0.;
1344 sctx.shift_hi = 1.;
1345 }
1346
1347 /* allocate working arrays
1348 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1349 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1350 */
1351 do {
1352 sctx.newshift = PETSC_FALSE;
1353
1354 for (i=0; i<mbs; i++) c2r[i] = mbs;
1355 if (mbs) il[0] = 0;
1356
1357 for (k = 0; k<mbs; k++) {
1358 /* zero rtmp */
1359 nz = bi[k+1] - bi[k];
1360 bjtmp = bj + bi[k];
1361 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
1362
1363 /* load in initial unfactored row */
1364 bval = ba + bi[k];
1365 jmin = ai[k]; jmax = ai[k+1];
1366 for (j = jmin; j < jmax; j++) {
1367 col = aj[j];
1368 rtmp[col] = aa[j];
1369 *bval++ = 0.0; /* for in-place factorization */
1370 }
1371 /* shift the diagonal of the matrix: ZeropivotApply() */
1372 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1373
1374 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1375 dk = rtmp[k];
1376 i = c2r[k]; /* first row to be added to k_th row */
1377
1378 while (i < k) {
1379 nexti = c2r[i]; /* next row to be added to k_th row */
1380
1381 /* compute multiplier, update diag(k) and U(i,k) */
1382 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1383 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
1384 dk += uikdi*ba[ili]; /* update diag[k] */
1385 ba[ili] = uikdi; /* -U(i,k) */
1386
1387 /* add multiple of row i to k-th row */
1388 jmin = ili + 1; jmax = bi[i+1];
1389 if (jmin < jmax) {
1390 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1391 /* update il and c2r for row i */
1392 il[i] = jmin;
1393 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
1394 }
1395 i = nexti;
1396 }
1397
1398 /* copy data into U(k,:) */
1399 rs = 0.0;
1400 jmin = bi[k]; jmax = bi[k+1]-1;
1401 if (jmin < jmax) {
1402 for (j=jmin; j<jmax; j++) {
1403 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
1404 }
1405 /* add the k-th row into il and c2r */
1406 il[k] = jmin;
1407 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
1408 }
1409
1410 sctx.rs = rs;
1411 sctx.pv = dk;
1412 ierr = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr);
1413 if (sctx.newshift) break;
1414 dk = sctx.pv;
1415
1416 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
1417 }
1418 } while (sctx.newshift);
1419
1420 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
1421
1422 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1423 B->ops->solves = MatSolves_SeqSBAIJ_1;
1424 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1425 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1426 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1427 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1428
1429 B->assembled = PETSC_TRUE;
1430 B->preallocated = PETSC_TRUE;
1431
1432 ierr = PetscLogFlops(B->rmap->n);CHKERRQ(ierr);
1433
1434 /* MatPivotView() */
1435 if (sctx.nshift) {
1436 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1437 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
1438 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1439 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
1440 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1441 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
1442 }
1443 }
1444 PetscFunctionReturn(0);
1445 }
1446
MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo * info)1447 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
1448 {
1449 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1450 PetscErrorCode ierr;
1451 PetscInt i,j,mbs = a->mbs;
1452 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1453 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1454 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1455 PetscReal rs;
1456 FactorShiftCtx sctx;
1457
1458 PetscFunctionBegin;
1459 /* MatPivotSetUp(): initialize shift context sctx */
1460 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
1461
1462 /* initialization */
1463 /* il and jl record the first nonzero element in each row of the accessing
1464 window U(0:k, k:mbs-1).
1465 jl: list of rows to be added to uneliminated rows
1466 i>= k: jl(i) is the first row to be added to row i
1467 i< k: jl(i) is the row following row i in some list of rows
1468 jl(i) = mbs indicates the end of a list
1469 il(i): points to the first nonzero element in U(i,k:mbs-1)
1470 */
1471 ierr = PetscMalloc1(mbs,&rtmp);CHKERRQ(ierr);
1472 ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr);
1473
1474 do {
1475 sctx.newshift = PETSC_FALSE;
1476 il[0] = 0;
1477 for (i=0; i<mbs; i++) {
1478 rtmp[i] = 0.0; jl[i] = mbs;
1479 }
1480
1481 for (k = 0; k<mbs; k++) {
1482 /*initialize k-th row with elements nonzero in row perm(k) of A */
1483 nz = ai[k+1] - ai[k];
1484 acol = aj + ai[k];
1485 aval = aa + ai[k];
1486 bval = ba + bi[k];
1487 while (nz--) {
1488 rtmp[*acol++] = *aval++;
1489 *bval++ = 0.0; /* for in-place factorization */
1490 }
1491
1492 /* shift the diagonal of the matrix */
1493 if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1494
1495 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1496 dk = rtmp[k];
1497 i = jl[k]; /* first row to be added to k_th row */
1498
1499 while (i < k) {
1500 nexti = jl[i]; /* next row to be added to k_th row */
1501 /* compute multiplier, update D(k) and U(i,k) */
1502 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1503 uikdi = -ba[ili]*ba[bi[i]];
1504 dk += uikdi*ba[ili];
1505 ba[ili] = uikdi; /* -U(i,k) */
1506
1507 /* add multiple of row i to k-th row ... */
1508 jmin = ili + 1;
1509 nz = bi[i+1] - jmin;
1510 if (nz > 0) {
1511 bcol = bj + jmin;
1512 bval = ba + jmin;
1513 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
1514 while (nz--) rtmp[*bcol++] += uikdi*(*bval++);
1515
1516 /* update il and jl for i-th row */
1517 il[i] = jmin;
1518 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1519 }
1520 i = nexti;
1521 }
1522
1523 /* shift the diagonals when zero pivot is detected */
1524 /* compute rs=sum of abs(off-diagonal) */
1525 rs = 0.0;
1526 jmin = bi[k]+1;
1527 nz = bi[k+1] - jmin;
1528 if (nz) {
1529 bcol = bj + jmin;
1530 while (nz--) {
1531 rs += PetscAbsScalar(rtmp[*bcol]);
1532 bcol++;
1533 }
1534 }
1535
1536 sctx.rs = rs;
1537 sctx.pv = dk;
1538 ierr = MatPivotCheck(C,A,info,&sctx,k);CHKERRQ(ierr);
1539 if (sctx.newshift) break; /* sctx.shift_amount is updated */
1540 dk = sctx.pv;
1541
1542 /* copy data into U(k,:) */
1543 ba[bi[k]] = 1.0/dk;
1544 jmin = bi[k]+1;
1545 nz = bi[k+1] - jmin;
1546 if (nz) {
1547 bcol = bj + jmin;
1548 bval = ba + jmin;
1549 while (nz--) {
1550 *bval++ = rtmp[*bcol];
1551 rtmp[*bcol++] = 0.0;
1552 }
1553 /* add k-th row into il and jl */
1554 il[k] = jmin;
1555 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1556 }
1557 } /* end of for (k = 0; k<mbs; k++) */
1558 } while (sctx.newshift);
1559 ierr = PetscFree(rtmp);CHKERRQ(ierr);
1560 ierr = PetscFree2(il,jl);CHKERRQ(ierr);
1561
1562 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1563 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1564 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1565 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1566 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1567
1568 C->assembled = PETSC_TRUE;
1569 C->preallocated = PETSC_TRUE;
1570
1571 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr);
1572 if (sctx.nshift) {
1573 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1574 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
1575 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1576 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
1577 }
1578 }
1579 PetscFunctionReturn(0);
1580 }
1581
MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo * info)1582 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info)
1583 {
1584 PetscErrorCode ierr;
1585 Mat C;
1586
1587 PetscFunctionBegin;
1588 ierr = MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C);CHKERRQ(ierr);
1589 ierr = MatCholeskyFactorSymbolic(C,A,perm,info);CHKERRQ(ierr);
1590 ierr = MatCholeskyFactorNumeric(C,A,info);CHKERRQ(ierr);
1591
1592 A->ops->solve = C->ops->solve;
1593 A->ops->solvetranspose = C->ops->solvetranspose;
1594
1595 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1596 PetscFunctionReturn(0);
1597 }
1598