1
2 /*
3 Defines the basic matrix operations for the BAIJ (compressed row)
4 matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19
MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar ** values)20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
21 {
22 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
23 PetscErrorCode ierr;
24 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
25 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work;
26 PetscReal shift = 0.0;
27 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE;
28
29 PetscFunctionBegin;
30 allowzeropivot = PetscNot(A->erroriffailure);
31
32 if (a->idiagvalid) {
33 if (values) *values = a->idiag;
34 PetscFunctionReturn(0);
35 }
36 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
37 diag_offset = a->diag;
38 if (!a->idiag) {
39 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
40 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
41 }
42 diag = a->idiag;
43 if (values) *values = a->idiag;
44 /* factor and invert each block */
45 switch (bs) {
46 case 1:
47 for (i=0; i<mbs; i++) {
48 odiag = v + 1*diag_offset[i];
49 diag[0] = odiag[0];
50
51 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
52 if (allowzeropivot) {
53 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
54 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
55 A->factorerror_zeropivot_row = i;
56 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
57 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
58 }
59
60 diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
61 diag += 1;
62 }
63 break;
64 case 2:
65 for (i=0; i<mbs; i++) {
66 odiag = v + 4*diag_offset[i];
67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
69 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
70 diag += 4;
71 }
72 break;
73 case 3:
74 for (i=0; i<mbs; i++) {
75 odiag = v + 9*diag_offset[i];
76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78 diag[8] = odiag[8];
79 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
80 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
81 diag += 9;
82 }
83 break;
84 case 4:
85 for (i=0; i<mbs; i++) {
86 odiag = v + 16*diag_offset[i];
87 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
88 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
89 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
90 diag += 16;
91 }
92 break;
93 case 5:
94 for (i=0; i<mbs; i++) {
95 odiag = v + 25*diag_offset[i];
96 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
97 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
98 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99 diag += 25;
100 }
101 break;
102 case 6:
103 for (i=0; i<mbs; i++) {
104 odiag = v + 36*diag_offset[i];
105 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
106 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
107 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108 diag += 36;
109 }
110 break;
111 case 7:
112 for (i=0; i<mbs; i++) {
113 odiag = v + 49*diag_offset[i];
114 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
115 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117 diag += 49;
118 }
119 break;
120 default:
121 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
122 for (i=0; i<mbs; i++) {
123 odiag = v + bs2*diag_offset[i];
124 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
125 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
126 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127 diag += bs2;
128 }
129 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
130 }
131 a->idiagvalid = PETSC_TRUE;
132 PetscFunctionReturn(0);
133 }
134
MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
136 {
137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138 PetscScalar *x,*work,*w,*workt,*t;
139 const MatScalar *v,*aa = a->a, *idiag;
140 const PetscScalar *b,*xb;
141 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
142 PetscErrorCode ierr;
143 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
145
146 PetscFunctionBegin;
147 its = its*lits;
148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
153
154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155
156 if (!m) PetscFunctionReturn(0);
157 diag = a->diag;
158 idiag = a->idiag;
159 k = PetscMax(A->rmap->n,A->cmap->n);
160 if (!a->mult_work) {
161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162 }
163 if (!a->sor_workt) {
164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165 }
166 if (!a->sor_work) {
167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168 }
169 work = a->mult_work;
170 t = a->sor_workt;
171 w = a->sor_work;
172
173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175
176 if (flag & SOR_ZERO_INITIAL_GUESS) {
177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178 switch (bs) {
179 case 1:
180 PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181 t[0] = b[0];
182 i2 = 1;
183 idiag += 1;
184 for (i=1; i<m; i++) {
185 v = aa + ai[i];
186 vi = aj + ai[i];
187 nz = diag[i] - ai[i];
188 s[0] = b[i2];
189 for (j=0; j<nz; j++) {
190 xw[0] = x[vi[j]];
191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192 }
193 t[i2] = s[0];
194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195 x[i2] = xw[0];
196 idiag += 1;
197 i2 += 1;
198 }
199 break;
200 case 2:
201 PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202 t[0] = b[0]; t[1] = b[1];
203 i2 = 2;
204 idiag += 4;
205 for (i=1; i<m; i++) {
206 v = aa + 4*ai[i];
207 vi = aj + ai[i];
208 nz = diag[i] - ai[i];
209 s[0] = b[i2]; s[1] = b[i2+1];
210 for (j=0; j<nz; j++) {
211 idx = 2*vi[j];
212 it = 4*j;
213 xw[0] = x[idx]; xw[1] = x[1+idx];
214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215 }
216 t[i2] = s[0]; t[i2+1] = s[1];
217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218 x[i2] = xw[0]; x[i2+1] = xw[1];
219 idiag += 4;
220 i2 += 2;
221 }
222 break;
223 case 3:
224 PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226 i2 = 3;
227 idiag += 9;
228 for (i=1; i<m; i++) {
229 v = aa + 9*ai[i];
230 vi = aj + ai[i];
231 nz = diag[i] - ai[i];
232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233 while (nz--) {
234 idx = 3*(*vi++);
235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237 v += 9;
238 }
239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242 idiag += 9;
243 i2 += 3;
244 }
245 break;
246 case 4:
247 PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249 i2 = 4;
250 idiag += 16;
251 for (i=1; i<m; i++) {
252 v = aa + 16*ai[i];
253 vi = aj + ai[i];
254 nz = diag[i] - ai[i];
255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256 while (nz--) {
257 idx = 4*(*vi++);
258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260 v += 16;
261 }
262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265 idiag += 16;
266 i2 += 4;
267 }
268 break;
269 case 5:
270 PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272 i2 = 5;
273 idiag += 25;
274 for (i=1; i<m; i++) {
275 v = aa + 25*ai[i];
276 vi = aj + ai[i];
277 nz = diag[i] - ai[i];
278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279 while (nz--) {
280 idx = 5*(*vi++);
281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283 v += 25;
284 }
285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288 idiag += 25;
289 i2 += 5;
290 }
291 break;
292 case 6:
293 PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295 i2 = 6;
296 idiag += 36;
297 for (i=1; i<m; i++) {
298 v = aa + 36*ai[i];
299 vi = aj + ai[i];
300 nz = diag[i] - ai[i];
301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302 while (nz--) {
303 idx = 6*(*vi++);
304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307 v += 36;
308 }
309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313 idiag += 36;
314 i2 += 6;
315 }
316 break;
317 case 7:
318 PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321 i2 = 7;
322 idiag += 49;
323 for (i=1; i<m; i++) {
324 v = aa + 49*ai[i];
325 vi = aj + ai[i];
326 nz = diag[i] - ai[i];
327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329 while (nz--) {
330 idx = 7*(*vi++);
331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334 v += 49;
335 }
336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341 idiag += 49;
342 i2 += 7;
343 }
344 break;
345 default:
346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
348 i2 = bs;
349 idiag += bs2;
350 for (i=1; i<m; i++) {
351 v = aa + bs2*ai[i];
352 vi = aj + ai[i];
353 nz = diag[i] - ai[i];
354
355 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
356 /* copy all rows of x that are needed into contiguous space */
357 workt = work;
358 for (j=0; j<nz; j++) {
359 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
360 workt += bs;
361 }
362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365
366 idiag += bs2;
367 i2 += bs;
368 }
369 break;
370 }
371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373 xb = t;
374 }
375 else xb = b;
376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377 idiag = a->idiag+bs2*(a->mbs-1);
378 i2 = bs * (m-1);
379 switch (bs) {
380 case 1:
381 s[0] = xb[i2];
382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383 x[i2] = xw[0];
384 i2 -= 1;
385 for (i=m-2; i>=0; i--) {
386 v = aa + (diag[i]+1);
387 vi = aj + diag[i] + 1;
388 nz = ai[i+1] - diag[i] - 1;
389 s[0] = xb[i2];
390 for (j=0; j<nz; j++) {
391 xw[0] = x[vi[j]];
392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393 }
394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395 x[i2] = xw[0];
396 idiag -= 1;
397 i2 -= 1;
398 }
399 break;
400 case 2:
401 s[0] = xb[i2]; s[1] = xb[i2+1];
402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403 x[i2] = xw[0]; x[i2+1] = xw[1];
404 i2 -= 2;
405 idiag -= 4;
406 for (i=m-2; i>=0; i--) {
407 v = aa + 4*(diag[i] + 1);
408 vi = aj + diag[i] + 1;
409 nz = ai[i+1] - diag[i] - 1;
410 s[0] = xb[i2]; s[1] = xb[i2+1];
411 for (j=0; j<nz; j++) {
412 idx = 2*vi[j];
413 it = 4*j;
414 xw[0] = x[idx]; xw[1] = x[1+idx];
415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416 }
417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418 x[i2] = xw[0]; x[i2+1] = xw[1];
419 idiag -= 4;
420 i2 -= 2;
421 }
422 break;
423 case 3:
424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427 i2 -= 3;
428 idiag -= 9;
429 for (i=m-2; i>=0; i--) {
430 v = aa + 9*(diag[i]+1);
431 vi = aj + diag[i] + 1;
432 nz = ai[i+1] - diag[i] - 1;
433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434 while (nz--) {
435 idx = 3*(*vi++);
436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438 v += 9;
439 }
440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442 idiag -= 9;
443 i2 -= 3;
444 }
445 break;
446 case 4:
447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450 i2 -= 4;
451 idiag -= 16;
452 for (i=m-2; i>=0; i--) {
453 v = aa + 16*(diag[i]+1);
454 vi = aj + diag[i] + 1;
455 nz = ai[i+1] - diag[i] - 1;
456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457 while (nz--) {
458 idx = 4*(*vi++);
459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461 v += 16;
462 }
463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465 idiag -= 16;
466 i2 -= 4;
467 }
468 break;
469 case 5:
470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473 i2 -= 5;
474 idiag -= 25;
475 for (i=m-2; i>=0; i--) {
476 v = aa + 25*(diag[i]+1);
477 vi = aj + diag[i] + 1;
478 nz = ai[i+1] - diag[i] - 1;
479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480 while (nz--) {
481 idx = 5*(*vi++);
482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484 v += 25;
485 }
486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488 idiag -= 25;
489 i2 -= 5;
490 }
491 break;
492 case 6:
493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496 i2 -= 6;
497 idiag -= 36;
498 for (i=m-2; i>=0; i--) {
499 v = aa + 36*(diag[i]+1);
500 vi = aj + diag[i] + 1;
501 nz = ai[i+1] - diag[i] - 1;
502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503 while (nz--) {
504 idx = 6*(*vi++);
505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508 v += 36;
509 }
510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512 idiag -= 36;
513 i2 -= 6;
514 }
515 break;
516 case 7:
517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519 PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522 i2 -= 7;
523 idiag -= 49;
524 for (i=m-2; i>=0; i--) {
525 v = aa + 49*(diag[i]+1);
526 vi = aj + diag[i] + 1;
527 nz = ai[i+1] - diag[i] - 1;
528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530 while (nz--) {
531 idx = 7*(*vi++);
532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535 v += 49;
536 }
537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540 idiag -= 49;
541 i2 -= 7;
542 }
543 break;
544 default:
545 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547 i2 -= bs;
548 idiag -= bs2;
549 for (i=m-2; i>=0; i--) {
550 v = aa + bs2*(diag[i]+1);
551 vi = aj + diag[i] + 1;
552 nz = ai[i+1] - diag[i] - 1;
553
554 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
555 /* copy all rows of x that are needed into contiguous space */
556 workt = work;
557 for (j=0; j<nz; j++) {
558 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
559 workt += bs;
560 }
561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563
564 idiag -= bs2;
565 i2 -= bs;
566 }
567 break;
568 }
569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570 }
571 its--;
572 }
573 while (its--) {
574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575 idiag = a->idiag;
576 i2 = 0;
577 switch (bs) {
578 case 1:
579 for (i=0; i<m; i++) {
580 v = aa + ai[i];
581 vi = aj + ai[i];
582 nz = ai[i+1] - ai[i];
583 s[0] = b[i2];
584 for (j=0; j<nz; j++) {
585 xw[0] = x[vi[j]];
586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587 }
588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589 x[i2] += xw[0];
590 idiag += 1;
591 i2 += 1;
592 }
593 break;
594 case 2:
595 for (i=0; i<m; i++) {
596 v = aa + 4*ai[i];
597 vi = aj + ai[i];
598 nz = ai[i+1] - ai[i];
599 s[0] = b[i2]; s[1] = b[i2+1];
600 for (j=0; j<nz; j++) {
601 idx = 2*vi[j];
602 it = 4*j;
603 xw[0] = x[idx]; xw[1] = x[1+idx];
604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605 }
606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607 x[i2] += xw[0]; x[i2+1] += xw[1];
608 idiag += 4;
609 i2 += 2;
610 }
611 break;
612 case 3:
613 for (i=0; i<m; i++) {
614 v = aa + 9*ai[i];
615 vi = aj + ai[i];
616 nz = ai[i+1] - ai[i];
617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618 while (nz--) {
619 idx = 3*(*vi++);
620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622 v += 9;
623 }
624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626 idiag += 9;
627 i2 += 3;
628 }
629 break;
630 case 4:
631 for (i=0; i<m; i++) {
632 v = aa + 16*ai[i];
633 vi = aj + ai[i];
634 nz = ai[i+1] - ai[i];
635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636 while (nz--) {
637 idx = 4*(*vi++);
638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640 v += 16;
641 }
642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644 idiag += 16;
645 i2 += 4;
646 }
647 break;
648 case 5:
649 for (i=0; i<m; i++) {
650 v = aa + 25*ai[i];
651 vi = aj + ai[i];
652 nz = ai[i+1] - ai[i];
653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654 while (nz--) {
655 idx = 5*(*vi++);
656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658 v += 25;
659 }
660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662 idiag += 25;
663 i2 += 5;
664 }
665 break;
666 case 6:
667 for (i=0; i<m; i++) {
668 v = aa + 36*ai[i];
669 vi = aj + ai[i];
670 nz = ai[i+1] - ai[i];
671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672 while (nz--) {
673 idx = 6*(*vi++);
674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677 v += 36;
678 }
679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682 idiag += 36;
683 i2 += 6;
684 }
685 break;
686 case 7:
687 for (i=0; i<m; i++) {
688 v = aa + 49*ai[i];
689 vi = aj + ai[i];
690 nz = ai[i+1] - ai[i];
691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693 while (nz--) {
694 idx = 7*(*vi++);
695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698 v += 49;
699 }
700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703 idiag += 49;
704 i2 += 7;
705 }
706 break;
707 default:
708 for (i=0; i<m; i++) {
709 v = aa + bs2*ai[i];
710 vi = aj + ai[i];
711 nz = ai[i+1] - ai[i];
712
713 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
714 /* copy all rows of x that are needed into contiguous space */
715 workt = work;
716 for (j=0; j<nz; j++) {
717 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
718 workt += bs;
719 }
720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722
723 idiag += bs2;
724 i2 += bs;
725 }
726 break;
727 }
728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729 }
730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731 idiag = a->idiag+bs2*(a->mbs-1);
732 i2 = bs * (m-1);
733 switch (bs) {
734 case 1:
735 for (i=m-1; i>=0; i--) {
736 v = aa + ai[i];
737 vi = aj + ai[i];
738 nz = ai[i+1] - ai[i];
739 s[0] = b[i2];
740 for (j=0; j<nz; j++) {
741 xw[0] = x[vi[j]];
742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743 }
744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745 x[i2] += xw[0];
746 idiag -= 1;
747 i2 -= 1;
748 }
749 break;
750 case 2:
751 for (i=m-1; i>=0; i--) {
752 v = aa + 4*ai[i];
753 vi = aj + ai[i];
754 nz = ai[i+1] - ai[i];
755 s[0] = b[i2]; s[1] = b[i2+1];
756 for (j=0; j<nz; j++) {
757 idx = 2*vi[j];
758 it = 4*j;
759 xw[0] = x[idx]; xw[1] = x[1+idx];
760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761 }
762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763 x[i2] += xw[0]; x[i2+1] += xw[1];
764 idiag -= 4;
765 i2 -= 2;
766 }
767 break;
768 case 3:
769 for (i=m-1; i>=0; i--) {
770 v = aa + 9*ai[i];
771 vi = aj + ai[i];
772 nz = ai[i+1] - ai[i];
773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774 while (nz--) {
775 idx = 3*(*vi++);
776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778 v += 9;
779 }
780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782 idiag -= 9;
783 i2 -= 3;
784 }
785 break;
786 case 4:
787 for (i=m-1; i>=0; i--) {
788 v = aa + 16*ai[i];
789 vi = aj + ai[i];
790 nz = ai[i+1] - ai[i];
791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792 while (nz--) {
793 idx = 4*(*vi++);
794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796 v += 16;
797 }
798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800 idiag -= 16;
801 i2 -= 4;
802 }
803 break;
804 case 5:
805 for (i=m-1; i>=0; i--) {
806 v = aa + 25*ai[i];
807 vi = aj + ai[i];
808 nz = ai[i+1] - ai[i];
809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810 while (nz--) {
811 idx = 5*(*vi++);
812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814 v += 25;
815 }
816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818 idiag -= 25;
819 i2 -= 5;
820 }
821 break;
822 case 6:
823 for (i=m-1; i>=0; i--) {
824 v = aa + 36*ai[i];
825 vi = aj + ai[i];
826 nz = ai[i+1] - ai[i];
827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828 while (nz--) {
829 idx = 6*(*vi++);
830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833 v += 36;
834 }
835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838 idiag -= 36;
839 i2 -= 6;
840 }
841 break;
842 case 7:
843 for (i=m-1; i>=0; i--) {
844 v = aa + 49*ai[i];
845 vi = aj + ai[i];
846 nz = ai[i+1] - ai[i];
847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849 while (nz--) {
850 idx = 7*(*vi++);
851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854 v += 49;
855 }
856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859 idiag -= 49;
860 i2 -= 7;
861 }
862 break;
863 default:
864 for (i=m-1; i>=0; i--) {
865 v = aa + bs2*ai[i];
866 vi = aj + ai[i];
867 nz = ai[i+1] - ai[i];
868
869 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
870 /* copy all rows of x that are needed into contiguous space */
871 workt = work;
872 for (j=0; j<nz; j++) {
873 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
874 workt += bs;
875 }
876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878
879 idiag -= bs2;
880 i2 -= bs;
881 }
882 break;
883 }
884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885 }
886 }
887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889 PetscFunctionReturn(0);
890 }
891
892
893 /*
894 Special version for direct calls from Fortran (Used in PETSc-fun3d)
895 */
896 #if defined(PETSC_HAVE_FORTRAN_CAPS)
897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899 #define matsetvaluesblocked4_ matsetvaluesblocked4
900 #endif
901
matsetvaluesblocked4_(Mat * AA,PetscInt * mm,const PetscInt im[],PetscInt * nn,const PetscInt in[],const PetscScalar v[])902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
903 {
904 Mat A = *AA;
905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
906 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
907 PetscInt *ai =a->i,*ailen=a->ilen;
908 PetscInt *aj =a->j,stepval,lastcol = -1;
909 const PetscScalar *value = v;
910 MatScalar *ap,*aa = a->a,*bap;
911 PetscErrorCode ierr;
912
913 PetscFunctionBegin;
914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915 stepval = (n-1)*4;
916 for (k=0; k<m; k++) { /* loop over added rows */
917 row = im[k];
918 rp = aj + ai[row];
919 ap = aa + 16*ai[row];
920 nrow = ailen[row];
921 low = 0;
922 high = nrow;
923 for (l=0; l<n; l++) { /* loop over added columns */
924 col = in[l];
925 if (col <= lastcol) low = 0;
926 else high = nrow;
927 lastcol = col;
928 value = v + k*(stepval+4 + l)*4;
929 while (high-low > 7) {
930 t = (low+high)/2;
931 if (rp[t] > col) high = t;
932 else low = t;
933 }
934 for (i=low; i<high; i++) {
935 if (rp[i] > col) break;
936 if (rp[i] == col) {
937 bap = ap + 16*i;
938 for (ii=0; ii<4; ii++,value+=stepval) {
939 for (jj=ii; jj<16; jj+=4) {
940 bap[jj] += *value++;
941 }
942 }
943 goto noinsert2;
944 }
945 }
946 N = nrow++ - 1;
947 high++; /* added new column index thus must search to one higher than before */
948 /* shift up all the later entries in this row */
949 for (ii=N; ii>=i; ii--) {
950 rp[ii+1] = rp[ii];
951 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
952 }
953 if (N >= i) {
954 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
955 }
956 rp[i] = col;
957 bap = ap + 16*i;
958 for (ii=0; ii<4; ii++,value+=stepval) {
959 for (jj=ii; jj<16; jj+=4) {
960 bap[jj] = *value++;
961 }
962 }
963 noinsert2:;
964 low = i;
965 }
966 ailen[row] = nrow;
967 }
968 PetscFunctionReturnVoid();
969 }
970
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976
matsetvalues4_(Mat * AA,PetscInt * mm,PetscInt * im,PetscInt * nn,PetscInt * in,PetscScalar * v)977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979 Mat A = *AA;
980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
982 PetscInt *ai=a->i,*ailen=a->ilen;
983 PetscInt *aj=a->j,brow,bcol;
984 PetscInt ridx,cidx,lastcol = -1;
985 MatScalar *ap,value,*aa=a->a,*bap;
986 PetscErrorCode ierr;
987
988 PetscFunctionBegin;
989 for (k=0; k<m; k++) { /* loop over added rows */
990 row = im[k]; brow = row/4;
991 rp = aj + ai[brow];
992 ap = aa + 16*ai[brow];
993 nrow = ailen[brow];
994 low = 0;
995 high = nrow;
996 for (l=0; l<n; l++) { /* loop over added columns */
997 col = in[l]; bcol = col/4;
998 ridx = row % 4; cidx = col % 4;
999 value = v[l + k*n];
1000 if (col <= lastcol) low = 0;
1001 else high = nrow;
1002 lastcol = col;
1003 while (high-low > 7) {
1004 t = (low+high)/2;
1005 if (rp[t] > bcol) high = t;
1006 else low = t;
1007 }
1008 for (i=low; i<high; i++) {
1009 if (rp[i] > bcol) break;
1010 if (rp[i] == bcol) {
1011 bap = ap + 16*i + 4*cidx + ridx;
1012 *bap += value;
1013 goto noinsert1;
1014 }
1015 }
1016 N = nrow++ - 1;
1017 high++; /* added new column thus must search to one higher than before */
1018 /* shift up all the later entries in this row */
1019 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1020 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1021 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1022 rp[i] = bcol;
1023 ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025 low = i;
1026 }
1027 ailen[brow] = nrow;
1028 }
1029 PetscFunctionReturnVoid();
1030 }
1031
1032 /*
1033 Checks for missing diagonals
1034 */
MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool * missing,PetscInt * d)1035 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1036 {
1037 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1038 PetscErrorCode ierr;
1039 PetscInt *diag,*ii = a->i,i;
1040
1041 PetscFunctionBegin;
1042 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1043 *missing = PETSC_FALSE;
1044 if (A->rmap->n > 0 && !ii) {
1045 *missing = PETSC_TRUE;
1046 if (d) *d = 0;
1047 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1048 } else {
1049 PetscInt n;
1050 n = PetscMin(a->mbs, a->nbs);
1051 diag = a->diag;
1052 for (i=0; i<n; i++) {
1053 if (diag[i] >= ii[i+1]) {
1054 *missing = PETSC_TRUE;
1055 if (d) *d = i;
1056 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1057 break;
1058 }
1059 }
1060 }
1061 PetscFunctionReturn(0);
1062 }
1063
MatMarkDiagonal_SeqBAIJ(Mat A)1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1065 {
1066 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1067 PetscErrorCode ierr;
1068 PetscInt i,j,m = a->mbs;
1069
1070 PetscFunctionBegin;
1071 if (!a->diag) {
1072 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1073 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1074 a->free_diag = PETSC_TRUE;
1075 }
1076 for (i=0; i<m; i++) {
1077 a->diag[i] = a->i[i+1];
1078 for (j=a->i[i]; j<a->i[i+1]; j++) {
1079 if (a->j[j] == i) {
1080 a->diag[i] = j;
1081 break;
1082 }
1083 }
1084 }
1085 PetscFunctionReturn(0);
1086 }
1087
1088
MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * inia[],const PetscInt * inja[],PetscBool * done)1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done)
1090 {
1091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1092 PetscErrorCode ierr;
1093 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1094 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1095
1096 PetscFunctionBegin;
1097 *nn = n;
1098 if (!ia) PetscFunctionReturn(0);
1099 if (symmetric) {
1100 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1101 nz = tia[n];
1102 } else {
1103 tia = a->i; tja = a->j;
1104 }
1105
1106 if (!blockcompressed && bs > 1) {
1107 (*nn) *= bs;
1108 /* malloc & create the natural set of indices */
1109 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1110 if (n) {
1111 (*ia)[0] = oshift;
1112 for (j=1; j<bs; j++) {
1113 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1114 }
1115 }
1116
1117 for (i=1; i<n; i++) {
1118 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1119 for (j=1; j<bs; j++) {
1120 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1121 }
1122 }
1123 if (n) {
1124 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1125 }
1126
1127 if (inja) {
1128 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1129 cnt = 0;
1130 for (i=0; i<n; i++) {
1131 for (j=0; j<bs; j++) {
1132 for (k=tia[i]; k<tia[i+1]; k++) {
1133 for (l=0; l<bs; l++) {
1134 (*ja)[cnt++] = bs*tja[k] + l;
1135 }
1136 }
1137 }
1138 }
1139 }
1140
1141 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1142 ierr = PetscFree(tia);CHKERRQ(ierr);
1143 ierr = PetscFree(tja);CHKERRQ(ierr);
1144 }
1145 } else if (oshift == 1) {
1146 if (symmetric) {
1147 nz = tia[A->rmap->n/bs];
1148 /* add 1 to i and j indices */
1149 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1150 *ia = tia;
1151 if (ja) {
1152 for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1153 *ja = tja;
1154 }
1155 } else {
1156 nz = a->i[A->rmap->n/bs];
1157 /* malloc space and add 1 to i and j indices */
1158 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1159 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1160 if (ja) {
1161 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1162 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1163 }
1164 }
1165 } else {
1166 *ia = tia;
1167 if (ja) *ja = tja;
1168 }
1169 PetscFunctionReturn(0);
1170 }
1171
MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
1173 {
1174 PetscErrorCode ierr;
1175
1176 PetscFunctionBegin;
1177 if (!ia) PetscFunctionReturn(0);
1178 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1179 ierr = PetscFree(*ia);CHKERRQ(ierr);
1180 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1181 }
1182 PetscFunctionReturn(0);
1183 }
1184
MatDestroy_SeqBAIJ(Mat A)1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1186 {
1187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1188 PetscErrorCode ierr;
1189
1190 PetscFunctionBegin;
1191 #if defined(PETSC_USE_LOG)
1192 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1193 #endif
1194 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1195 ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1196 ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1197 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1198 ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1199 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1200 ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1201 ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1202 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1203 ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1204 ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1205 ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1206 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1207
1208 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1209 ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1210 ierr = PetscFree(A->data);CHKERRQ(ierr);
1211
1212 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1225 #if defined(PETSC_HAVE_HYPRE)
1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1227 #endif
1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1229 PetscFunctionReturn(0);
1230 }
1231
MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233 {
1234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1235 PetscErrorCode ierr;
1236
1237 PetscFunctionBegin;
1238 switch (op) {
1239 case MAT_ROW_ORIENTED:
1240 a->roworiented = flg;
1241 break;
1242 case MAT_KEEP_NONZERO_PATTERN:
1243 a->keepnonzeropattern = flg;
1244 break;
1245 case MAT_NEW_NONZERO_LOCATIONS:
1246 a->nonew = (flg ? 0 : 1);
1247 break;
1248 case MAT_NEW_NONZERO_LOCATION_ERR:
1249 a->nonew = (flg ? -1 : 0);
1250 break;
1251 case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252 a->nonew = (flg ? -2 : 0);
1253 break;
1254 case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255 a->nounused = (flg ? -1 : 0);
1256 break;
1257 case MAT_NEW_DIAGONALS:
1258 case MAT_IGNORE_OFF_PROC_ENTRIES:
1259 case MAT_USE_HASH_TABLE:
1260 case MAT_SORTED_FULL:
1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262 break;
1263 case MAT_SPD:
1264 case MAT_SYMMETRIC:
1265 case MAT_STRUCTURALLY_SYMMETRIC:
1266 case MAT_HERMITIAN:
1267 case MAT_SYMMETRY_ETERNAL:
1268 case MAT_SUBMAT_SINGLEIS:
1269 case MAT_STRUCTURE_ONLY:
1270 /* These options are handled directly by MatSetOption() */
1271 break;
1272 default:
1273 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1274 }
1275 PetscFunctionReturn(0);
1276 }
1277
1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */
MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v,PetscInt * ai,PetscInt * aj,PetscScalar * aa)1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280 {
1281 PetscErrorCode ierr;
1282 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283 MatScalar *aa_i;
1284 PetscScalar *v_i;
1285
1286 PetscFunctionBegin;
1287 bs = A->rmap->bs;
1288 bs2 = bs*bs;
1289 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1290
1291 bn = row/bs; /* Block number */
1292 bp = row % bs; /* Block Position */
1293 M = ai[bn+1] - ai[bn];
1294 *nz = bs*M;
1295
1296 if (v) {
1297 *v = NULL;
1298 if (*nz) {
1299 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1300 for (i=0; i<M; i++) { /* for each block in the block row */
1301 v_i = *v + i*bs;
1302 aa_i = aa + bs2*(ai[bn] + i);
1303 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304 }
1305 }
1306 }
1307
1308 if (idx) {
1309 *idx = NULL;
1310 if (*nz) {
1311 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1312 for (i=0; i<M; i++) { /* for each block in the block row */
1313 idx_i = *idx + i*bs;
1314 itmp = bs*aj[ai[bn] + i];
1315 for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316 }
1317 }
1318 }
1319 PetscFunctionReturn(0);
1320 }
1321
MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1325 PetscErrorCode ierr;
1326
1327 PetscFunctionBegin;
1328 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1329 PetscFunctionReturn(0);
1330 }
1331
MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334 PetscErrorCode ierr;
1335
1336 PetscFunctionBegin;
1337 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1338 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
1339 PetscFunctionReturn(0);
1340 }
1341
MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat * B)1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1343 {
1344 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at;
1345 Mat C;
1346 PetscErrorCode ierr;
1347 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1348 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr;
1349 MatScalar *ata,*aa=a->a;
1350
1351 PetscFunctionBegin;
1352 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1353 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1354 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1355
1356 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1357 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1358 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1359 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1360
1361 at = (Mat_SeqBAIJ*)C->data;
1362 ati = at->i;
1363 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1364 } else {
1365 C = *B;
1366 at = (Mat_SeqBAIJ*)C->data;
1367 ati = at->i;
1368 }
1369
1370 atj = at->j;
1371 ata = at->a;
1372
1373 /* Copy ati into atfill so we have locations of the next free space in atj */
1374 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1375
1376 /* Walk through A row-wise and mark nonzero entries of A^T. */
1377 for (i=0; i<mbs; i++) {
1378 anzj = ai[i+1] - ai[i];
1379 for (j=0; j<anzj; j++) {
1380 atj[atfill[*aj]] = i;
1381 for (kr=0; kr<bs; kr++) {
1382 for (k=0; k<bs; k++) {
1383 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1384 }
1385 }
1386 atfill[*aj++] += 1;
1387 }
1388 }
1389 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391
1392 /* Clean up temporary space and complete requests. */
1393 ierr = PetscFree(atfill);CHKERRQ(ierr);
1394
1395 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1396 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1397 *B = C;
1398 } else {
1399 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1400 }
1401 PetscFunctionReturn(0);
1402 }
1403
MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool * f)1404 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f)
1405 {
1406 PetscErrorCode ierr;
1407 Mat Btrans;
1408
1409 PetscFunctionBegin;
1410 *f = PETSC_FALSE;
1411 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1412 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1413 ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1414 PetscFunctionReturn(0);
1415 }
1416
1417 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)1418 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1419 {
1420 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data;
1421 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1422 PetscInt *rowlens,*colidxs;
1423 PetscScalar *matvals;
1424 PetscErrorCode ierr;
1425
1426 PetscFunctionBegin;
1427 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1428
1429 M = mat->rmap->N;
1430 N = mat->cmap->N;
1431 m = mat->rmap->n;
1432 bs = mat->rmap->bs;
1433 nz = bs*bs*A->nz;
1434
1435 /* write matrix header */
1436 header[0] = MAT_FILE_CLASSID;
1437 header[1] = M; header[2] = N; header[3] = nz;
1438 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1439
1440 /* store row lengths */
1441 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1442 for (cnt=0, i=0; i<A->mbs; i++)
1443 for (j=0; j<bs; j++)
1444 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1445 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1446 ierr = PetscFree(rowlens);CHKERRQ(ierr);
1447
1448 /* store column indices */
1449 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1450 for (cnt=0, i=0; i<A->mbs; i++)
1451 for (k=0; k<bs; k++)
1452 for (j=A->i[i]; j<A->i[i+1]; j++)
1453 for (l=0; l<bs; l++)
1454 colidxs[cnt++] = bs*A->j[j] + l;
1455 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1456 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1457 ierr = PetscFree(colidxs);CHKERRQ(ierr);
1458
1459 /* store nonzero values */
1460 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1461 for (cnt=0, i=0; i<A->mbs; i++)
1462 for (k=0; k<bs; k++)
1463 for (j=A->i[i]; j<A->i[i+1]; j++)
1464 for (l=0; l<bs; l++)
1465 matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1466 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1467 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1468 ierr = PetscFree(matvals);CHKERRQ(ierr);
1469
1470 /* write block size option to the viewer's .info file */
1471 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1472 PetscFunctionReturn(0);
1473 }
1474
MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)1475 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1476 {
1477 PetscErrorCode ierr;
1478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1479 PetscInt i,bs = A->rmap->bs,k;
1480
1481 PetscFunctionBegin;
1482 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1483 for (i=0; i<a->mbs; i++) {
1484 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1485 for (k=a->i[i]; k<a->i[i+1]; k++) {
1486 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1487 }
1488 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1489 }
1490 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1491 PetscFunctionReturn(0);
1492 }
1493
MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)1494 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1495 {
1496 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1497 PetscErrorCode ierr;
1498 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1499 PetscViewerFormat format;
1500
1501 PetscFunctionBegin;
1502 if (A->structure_only) {
1503 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1504 PetscFunctionReturn(0);
1505 }
1506
1507 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1508 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1509 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr);
1510 } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1511 const char *matname;
1512 Mat aij;
1513 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1514 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1515 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1516 ierr = MatView(aij,viewer);CHKERRQ(ierr);
1517 ierr = MatDestroy(&aij);CHKERRQ(ierr);
1518 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1519 PetscFunctionReturn(0);
1520 } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1521 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1522 for (i=0; i<a->mbs; i++) {
1523 for (j=0; j<bs; j++) {
1524 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1525 for (k=a->i[i]; k<a->i[i+1]; k++) {
1526 for (l=0; l<bs; l++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1529 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1530 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1531 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1532 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1533 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1534 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536 }
1537 #else
1538 if (a->a[bs2*k + l*bs + j] != 0.0) {
1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1540 }
1541 #endif
1542 }
1543 }
1544 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1545 }
1546 }
1547 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1548 } else {
1549 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1550 for (i=0; i<a->mbs; i++) {
1551 for (j=0; j<bs; j++) {
1552 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1553 for (k=a->i[i]; k<a->i[i+1]; k++) {
1554 for (l=0; l<bs; l++) {
1555 #if defined(PETSC_USE_COMPLEX)
1556 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1558 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1559 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1560 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1561 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1562 } else {
1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564 }
1565 #else
1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1567 #endif
1568 }
1569 }
1570 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1571 }
1572 }
1573 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1574 }
1575 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1576 PetscFunctionReturn(0);
1577 }
1578
1579 #include <petscdraw.h>
MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void * Aa)1580 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1581 {
1582 Mat A = (Mat) Aa;
1583 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1584 PetscErrorCode ierr;
1585 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1586 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1587 MatScalar *aa;
1588 PetscViewer viewer;
1589 PetscViewerFormat format;
1590
1591 PetscFunctionBegin;
1592 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1593 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1594 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1595
1596 /* loop over matrix elements drawing boxes */
1597
1598 if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1599 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1600 /* Blue for negative, Cyan for zero and Red for positive */
1601 color = PETSC_DRAW_BLUE;
1602 for (i=0,row=0; i<mbs; i++,row+=bs) {
1603 for (j=a->i[i]; j<a->i[i+1]; j++) {
1604 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1605 x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606 aa = a->a + j*bs2;
1607 for (k=0; k<bs; k++) {
1608 for (l=0; l<bs; l++) {
1609 if (PetscRealPart(*aa++) >= 0.) continue;
1610 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611 }
1612 }
1613 }
1614 }
1615 color = PETSC_DRAW_CYAN;
1616 for (i=0,row=0; i<mbs; i++,row+=bs) {
1617 for (j=a->i[i]; j<a->i[i+1]; j++) {
1618 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1619 x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620 aa = a->a + j*bs2;
1621 for (k=0; k<bs; k++) {
1622 for (l=0; l<bs; l++) {
1623 if (PetscRealPart(*aa++) != 0.) continue;
1624 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625 }
1626 }
1627 }
1628 }
1629 color = PETSC_DRAW_RED;
1630 for (i=0,row=0; i<mbs; i++,row+=bs) {
1631 for (j=a->i[i]; j<a->i[i+1]; j++) {
1632 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1633 x_l = a->j[j]*bs; x_r = x_l + 1.0;
1634 aa = a->a + j*bs2;
1635 for (k=0; k<bs; k++) {
1636 for (l=0; l<bs; l++) {
1637 if (PetscRealPart(*aa++) <= 0.) continue;
1638 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1639 }
1640 }
1641 }
1642 }
1643 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1644 } else {
1645 /* use contour shading to indicate magnitude of values */
1646 /* first determine max of all nonzero values */
1647 PetscReal minv = 0.0, maxv = 0.0;
1648 PetscDraw popup;
1649
1650 for (i=0; i<a->nz*a->bs2; i++) {
1651 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1652 }
1653 if (minv >= maxv) maxv = minv + PETSC_SMALL;
1654 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1655 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1656
1657 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1658 for (i=0,row=0; i<mbs; i++,row+=bs) {
1659 for (j=a->i[i]; j<a->i[i+1]; j++) {
1660 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1661 x_l = a->j[j]*bs; x_r = x_l + 1.0;
1662 aa = a->a + j*bs2;
1663 for (k=0; k<bs; k++) {
1664 for (l=0; l<bs; l++) {
1665 MatScalar v = *aa++;
1666 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1667 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1668 }
1669 }
1670 }
1671 }
1672 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1673 }
1674 PetscFunctionReturn(0);
1675 }
1676
MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)1677 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1678 {
1679 PetscErrorCode ierr;
1680 PetscReal xl,yl,xr,yr,w,h;
1681 PetscDraw draw;
1682 PetscBool isnull;
1683
1684 PetscFunctionBegin;
1685 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1686 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1687 if (isnull) PetscFunctionReturn(0);
1688
1689 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1690 xr += w; yr += h; xl = -w; yl = -h;
1691 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1692 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1693 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1694 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1695 ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1696 PetscFunctionReturn(0);
1697 }
1698
MatView_SeqBAIJ(Mat A,PetscViewer viewer)1699 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1700 {
1701 PetscErrorCode ierr;
1702 PetscBool iascii,isbinary,isdraw;
1703
1704 PetscFunctionBegin;
1705 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1708 if (iascii) {
1709 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1710 } else if (isbinary) {
1711 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1712 } else if (isdraw) {
1713 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1714 } else {
1715 Mat B;
1716 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1717 ierr = MatView(B,viewer);CHKERRQ(ierr);
1718 ierr = MatDestroy(&B);CHKERRQ(ierr);
1719 }
1720 PetscFunctionReturn(0);
1721 }
1722
1723
MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])1724 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1725 {
1726 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1727 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1728 PetscInt *ai = a->i,*ailen = a->ilen;
1729 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1730 MatScalar *ap,*aa = a->a;
1731
1732 PetscFunctionBegin;
1733 for (k=0; k<m; k++) { /* loop over rows */
1734 row = im[k]; brow = row/bs;
1735 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1736 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1737 rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1738 nrow = ailen[brow];
1739 for (l=0; l<n; l++) { /* loop over columns */
1740 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1741 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1742 col = in[l];
1743 bcol = col/bs;
1744 cidx = col%bs;
1745 ridx = row%bs;
1746 high = nrow;
1747 low = 0; /* assume unsorted */
1748 while (high-low > 5) {
1749 t = (low+high)/2;
1750 if (rp[t] > bcol) high = t;
1751 else low = t;
1752 }
1753 for (i=low; i<high; i++) {
1754 if (rp[i] > bcol) break;
1755 if (rp[i] == bcol) {
1756 *v++ = ap[bs2*i+bs*cidx+ridx];
1757 goto finished;
1758 }
1759 }
1760 *v++ = 0.0;
1761 finished:;
1762 }
1763 }
1764 PetscFunctionReturn(0);
1765 }
1766
MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)1767 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1768 {
1769 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1770 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1771 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1772 PetscErrorCode ierr;
1773 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1774 PetscBool roworiented=a->roworiented;
1775 const PetscScalar *value = v;
1776 MatScalar *ap=NULL,*aa = a->a,*bap;
1777
1778 PetscFunctionBegin;
1779 if (roworiented) {
1780 stepval = (n-1)*bs;
1781 } else {
1782 stepval = (m-1)*bs;
1783 }
1784 for (k=0; k<m; k++) { /* loop over added rows */
1785 row = im[k];
1786 if (row < 0) continue;
1787 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1788 rp = aj + ai[row];
1789 if (!A->structure_only) ap = aa + bs2*ai[row];
1790 rmax = imax[row];
1791 nrow = ailen[row];
1792 low = 0;
1793 high = nrow;
1794 for (l=0; l<n; l++) { /* loop over added columns */
1795 if (in[l] < 0) continue;
1796 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1797 col = in[l];
1798 if (!A->structure_only) {
1799 if (roworiented) {
1800 value = v + (k*(stepval+bs) + l)*bs;
1801 } else {
1802 value = v + (l*(stepval+bs) + k)*bs;
1803 }
1804 }
1805 if (col <= lastcol) low = 0;
1806 else high = nrow;
1807 lastcol = col;
1808 while (high-low > 7) {
1809 t = (low+high)/2;
1810 if (rp[t] > col) high = t;
1811 else low = t;
1812 }
1813 for (i=low; i<high; i++) {
1814 if (rp[i] > col) break;
1815 if (rp[i] == col) {
1816 if (A->structure_only) goto noinsert2;
1817 bap = ap + bs2*i;
1818 if (roworiented) {
1819 if (is == ADD_VALUES) {
1820 for (ii=0; ii<bs; ii++,value+=stepval) {
1821 for (jj=ii; jj<bs2; jj+=bs) {
1822 bap[jj] += *value++;
1823 }
1824 }
1825 } else {
1826 for (ii=0; ii<bs; ii++,value+=stepval) {
1827 for (jj=ii; jj<bs2; jj+=bs) {
1828 bap[jj] = *value++;
1829 }
1830 }
1831 }
1832 } else {
1833 if (is == ADD_VALUES) {
1834 for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1835 for (jj=0; jj<bs; jj++) {
1836 bap[jj] += value[jj];
1837 }
1838 bap += bs;
1839 }
1840 } else {
1841 for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1842 for (jj=0; jj<bs; jj++) {
1843 bap[jj] = value[jj];
1844 }
1845 bap += bs;
1846 }
1847 }
1848 }
1849 goto noinsert2;
1850 }
1851 }
1852 if (nonew == 1) goto noinsert2;
1853 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1854 if (A->structure_only) {
1855 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1856 } else {
1857 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1858 }
1859 N = nrow++ - 1; high++;
1860 /* shift up all the later entries in this row */
1861 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1862 rp[i] = col;
1863 if (!A->structure_only) {
1864 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1865 bap = ap + bs2*i;
1866 if (roworiented) {
1867 for (ii=0; ii<bs; ii++,value+=stepval) {
1868 for (jj=ii; jj<bs2; jj+=bs) {
1869 bap[jj] = *value++;
1870 }
1871 }
1872 } else {
1873 for (ii=0; ii<bs; ii++,value+=stepval) {
1874 for (jj=0; jj<bs; jj++) {
1875 *bap++ = *value++;
1876 }
1877 }
1878 }
1879 }
1880 noinsert2:;
1881 low = i;
1882 }
1883 ailen[row] = nrow;
1884 }
1885 PetscFunctionReturn(0);
1886 }
1887
MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)1888 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1889 {
1890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1891 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1892 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1893 PetscErrorCode ierr;
1894 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1895 MatScalar *aa = a->a,*ap;
1896 PetscReal ratio=0.6;
1897
1898 PetscFunctionBegin;
1899 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1900
1901 if (m) rmax = ailen[0];
1902 for (i=1; i<mbs; i++) {
1903 /* move each row back by the amount of empty slots (fshift) before it*/
1904 fshift += imax[i-1] - ailen[i-1];
1905 rmax = PetscMax(rmax,ailen[i]);
1906 if (fshift) {
1907 ip = aj + ai[i];
1908 ap = aa + bs2*ai[i];
1909 N = ailen[i];
1910 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1911 if (!A->structure_only) {
1912 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1913 }
1914 }
1915 ai[i] = ai[i-1] + ailen[i-1];
1916 }
1917 if (mbs) {
1918 fshift += imax[mbs-1] - ailen[mbs-1];
1919 ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1920 }
1921
1922 /* reset ilen and imax for each row */
1923 a->nonzerorowcnt = 0;
1924 if (A->structure_only) {
1925 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1926 } else { /* !A->structure_only */
1927 for (i=0; i<mbs; i++) {
1928 ailen[i] = imax[i] = ai[i+1] - ai[i];
1929 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1930 }
1931 }
1932 a->nz = ai[mbs];
1933
1934 /* diagonals may have moved, so kill the diagonal pointers */
1935 a->idiagvalid = PETSC_FALSE;
1936 if (fshift && a->diag) {
1937 ierr = PetscFree(a->diag);CHKERRQ(ierr);
1938 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1939 a->diag = NULL;
1940 }
1941 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1942 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1943 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1944 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1945
1946 A->info.mallocs += a->reallocs;
1947 a->reallocs = 0;
1948 A->info.nz_unneeded = (PetscReal)fshift*bs2;
1949 a->rmax = rmax;
1950
1951 if (!A->structure_only) {
1952 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1953 }
1954 PetscFunctionReturn(0);
1955 }
1956
1957 /*
1958 This function returns an array of flags which indicate the locations of contiguous
1959 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1960 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1961 Assume: sizes should be long enough to hold all the values.
1962 */
MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[],PetscInt * bs_max)1963 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1964 {
1965 PetscInt i,j,k,row;
1966 PetscBool flg;
1967
1968 PetscFunctionBegin;
1969 for (i=0,j=0; i<n; j++) {
1970 row = idx[i];
1971 if (row%bs!=0) { /* Not the begining of a block */
1972 sizes[j] = 1;
1973 i++;
1974 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1975 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1976 i++;
1977 } else { /* Begining of the block, so check if the complete block exists */
1978 flg = PETSC_TRUE;
1979 for (k=1; k<bs; k++) {
1980 if (row+k != idx[i+k]) { /* break in the block */
1981 flg = PETSC_FALSE;
1982 break;
1983 }
1984 }
1985 if (flg) { /* No break in the bs */
1986 sizes[j] = bs;
1987 i += bs;
1988 } else {
1989 sizes[j] = 1;
1990 i++;
1991 }
1992 }
1993 }
1994 *bs_max = j;
1995 PetscFunctionReturn(0);
1996 }
1997
MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)1998 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1999 {
2000 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2001 PetscErrorCode ierr;
2002 PetscInt i,j,k,count,*rows;
2003 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2004 PetscScalar zero = 0.0;
2005 MatScalar *aa;
2006 const PetscScalar *xx;
2007 PetscScalar *bb;
2008
2009 PetscFunctionBegin;
2010 /* fix right hand side if needed */
2011 if (x && b) {
2012 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2013 ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2014 for (i=0; i<is_n; i++) {
2015 bb[is_idx[i]] = diag*xx[is_idx[i]];
2016 }
2017 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2018 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2019 }
2020
2021 /* Make a copy of the IS and sort it */
2022 /* allocate memory for rows,sizes */
2023 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2024
2025 /* copy IS values to rows, and sort them */
2026 for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2027 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2028
2029 if (baij->keepnonzeropattern) {
2030 for (i=0; i<is_n; i++) sizes[i] = 1;
2031 bs_max = is_n;
2032 } else {
2033 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2034 A->nonzerostate++;
2035 }
2036
2037 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2038 row = rows[j];
2039 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2040 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2041 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2042 if (sizes[i] == bs && !baij->keepnonzeropattern) {
2043 if (diag != (PetscScalar)0.0) {
2044 if (baij->ilen[row/bs] > 0) {
2045 baij->ilen[row/bs] = 1;
2046 baij->j[baij->i[row/bs]] = row/bs;
2047
2048 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2049 }
2050 /* Now insert all the diagonal values for this bs */
2051 for (k=0; k<bs; k++) {
2052 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2053 }
2054 } else { /* (diag == 0.0) */
2055 baij->ilen[row/bs] = 0;
2056 } /* end (diag == 0.0) */
2057 } else { /* (sizes[i] != bs) */
2058 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2059 for (k=0; k<count; k++) {
2060 aa[0] = zero;
2061 aa += bs;
2062 }
2063 if (diag != (PetscScalar)0.0) {
2064 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2065 }
2066 }
2067 }
2068
2069 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2070 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071 PetscFunctionReturn(0);
2072 }
2073
MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2074 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2075 {
2076 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2077 PetscErrorCode ierr;
2078 PetscInt i,j,k,count;
2079 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col;
2080 PetscScalar zero = 0.0;
2081 MatScalar *aa;
2082 const PetscScalar *xx;
2083 PetscScalar *bb;
2084 PetscBool *zeroed,vecs = PETSC_FALSE;
2085
2086 PetscFunctionBegin;
2087 /* fix right hand side if needed */
2088 if (x && b) {
2089 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2090 ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2091 vecs = PETSC_TRUE;
2092 }
2093
2094 /* zero the columns */
2095 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2096 for (i=0; i<is_n; i++) {
2097 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2098 zeroed[is_idx[i]] = PETSC_TRUE;
2099 }
2100 for (i=0; i<A->rmap->N; i++) {
2101 if (!zeroed[i]) {
2102 row = i/bs;
2103 for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2104 for (k=0; k<bs; k++) {
2105 col = bs*baij->j[j] + k;
2106 if (zeroed[col]) {
2107 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2108 if (vecs) bb[i] -= aa[0]*xx[col];
2109 aa[0] = 0.0;
2110 }
2111 }
2112 }
2113 } else if (vecs) bb[i] = diag*xx[i];
2114 }
2115 ierr = PetscFree(zeroed);CHKERRQ(ierr);
2116 if (vecs) {
2117 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2118 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2119 }
2120
2121 /* zero the rows */
2122 for (i=0; i<is_n; i++) {
2123 row = is_idx[i];
2124 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2125 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2126 for (k=0; k<count; k++) {
2127 aa[0] = zero;
2128 aa += bs;
2129 }
2130 if (diag != (PetscScalar)0.0) {
2131 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2132 }
2133 }
2134 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135 PetscFunctionReturn(0);
2136 }
2137
MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2138 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2139 {
2140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2141 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2142 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2143 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2144 PetscErrorCode ierr;
2145 PetscInt ridx,cidx,bs2=a->bs2;
2146 PetscBool roworiented=a->roworiented;
2147 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap;
2148
2149 PetscFunctionBegin;
2150 for (k=0; k<m; k++) { /* loop over added rows */
2151 row = im[k];
2152 brow = row/bs;
2153 if (row < 0) continue;
2154 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2155 rp = aj + ai[brow];
2156 if (!A->structure_only) ap = aa + bs2*ai[brow];
2157 rmax = imax[brow];
2158 nrow = ailen[brow];
2159 low = 0;
2160 high = nrow;
2161 for (l=0; l<n; l++) { /* loop over added columns */
2162 if (in[l] < 0) continue;
2163 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2164 col = in[l]; bcol = col/bs;
2165 ridx = row % bs; cidx = col % bs;
2166 if (!A->structure_only) {
2167 if (roworiented) {
2168 value = v[l + k*n];
2169 } else {
2170 value = v[k + l*m];
2171 }
2172 }
2173 if (col <= lastcol) low = 0; else high = nrow;
2174 lastcol = col;
2175 while (high-low > 7) {
2176 t = (low+high)/2;
2177 if (rp[t] > bcol) high = t;
2178 else low = t;
2179 }
2180 for (i=low; i<high; i++) {
2181 if (rp[i] > bcol) break;
2182 if (rp[i] == bcol) {
2183 bap = ap + bs2*i + bs*cidx + ridx;
2184 if (!A->structure_only) {
2185 if (is == ADD_VALUES) *bap += value;
2186 else *bap = value;
2187 }
2188 goto noinsert1;
2189 }
2190 }
2191 if (nonew == 1) goto noinsert1;
2192 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2193 if (A->structure_only) {
2194 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2195 } else {
2196 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2197 }
2198 N = nrow++ - 1; high++;
2199 /* shift up all the later entries in this row */
2200 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2201 rp[i] = bcol;
2202 if (!A->structure_only) {
2203 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2204 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2205 ap[bs2*i + bs*cidx + ridx] = value;
2206 }
2207 a->nz++;
2208 A->nonzerostate++;
2209 noinsert1:;
2210 low = i;
2211 }
2212 ailen[brow] = nrow;
2213 }
2214 PetscFunctionReturn(0);
2215 }
2216
MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo * info)2217 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2218 {
2219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2220 Mat outA;
2221 PetscErrorCode ierr;
2222 PetscBool row_identity,col_identity;
2223
2224 PetscFunctionBegin;
2225 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2226 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2227 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2228 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2229
2230 outA = inA;
2231 inA->factortype = MAT_FACTOR_LU;
2232 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2233 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2234
2235 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2236
2237 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2238 ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2239 a->row = row;
2240 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2241 ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2242 a->col = col;
2243
2244 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2245 ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2246 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2247 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2248
2249 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2250 if (!a->solve_work) {
2251 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2252 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2253 }
2254 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2255 PetscFunctionReturn(0);
2256 }
2257
MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt * indices)2258 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2259 {
2260 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2261 PetscInt i,nz,mbs;
2262
2263 PetscFunctionBegin;
2264 nz = baij->maxnz;
2265 mbs = baij->mbs;
2266 for (i=0; i<nz; i++) {
2267 baij->j[i] = indices[i];
2268 }
2269 baij->nz = nz;
2270 for (i=0; i<mbs; i++) {
2271 baij->ilen[i] = baij->imax[i];
2272 }
2273 PetscFunctionReturn(0);
2274 }
2275
2276 /*@
2277 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2278 in the matrix.
2279
2280 Input Parameters:
2281 + mat - the SeqBAIJ matrix
2282 - indices - the column indices
2283
2284 Level: advanced
2285
2286 Notes:
2287 This can be called if you have precomputed the nonzero structure of the
2288 matrix and want to provide it to the matrix object to improve the performance
2289 of the MatSetValues() operation.
2290
2291 You MUST have set the correct numbers of nonzeros per row in the call to
2292 MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2293
2294 MUST be called before any calls to MatSetValues();
2295
2296 @*/
MatSeqBAIJSetColumnIndices(Mat mat,PetscInt * indices)2297 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2298 {
2299 PetscErrorCode ierr;
2300
2301 PetscFunctionBegin;
2302 PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2303 PetscValidPointer(indices,2);
2304 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2305 PetscFunctionReturn(0);
2306 }
2307
MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])2308 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2309 {
2310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2311 PetscErrorCode ierr;
2312 PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2313 PetscReal atmp;
2314 PetscScalar *x,zero = 0.0;
2315 MatScalar *aa;
2316 PetscInt ncols,brow,krow,kcol;
2317
2318 PetscFunctionBegin;
2319 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2320 bs = A->rmap->bs;
2321 aa = a->a;
2322 ai = a->i;
2323 aj = a->j;
2324 mbs = a->mbs;
2325
2326 ierr = VecSet(v,zero);CHKERRQ(ierr);
2327 ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2328 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2329 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2330 for (i=0; i<mbs; i++) {
2331 ncols = ai[1] - ai[0]; ai++;
2332 brow = bs*i;
2333 for (j=0; j<ncols; j++) {
2334 for (kcol=0; kcol<bs; kcol++) {
2335 for (krow=0; krow<bs; krow++) {
2336 atmp = PetscAbsScalar(*aa);aa++;
2337 row = brow + krow; /* row index */
2338 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2339 }
2340 }
2341 aj++;
2342 }
2343 }
2344 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2345 PetscFunctionReturn(0);
2346 }
2347
MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)2348 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2349 {
2350 PetscErrorCode ierr;
2351
2352 PetscFunctionBegin;
2353 /* If the two matrices have the same copy implementation, use fast copy. */
2354 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2355 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2356 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2357 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2358
2359 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2360 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2361 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2362 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2363 } else {
2364 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2365 }
2366 PetscFunctionReturn(0);
2367 }
2368
MatSetUp_SeqBAIJ(Mat A)2369 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2370 {
2371 PetscErrorCode ierr;
2372
2373 PetscFunctionBegin;
2374 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2375 PetscFunctionReturn(0);
2376 }
2377
MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar * array[])2378 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2379 {
2380 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2381
2382 PetscFunctionBegin;
2383 *array = a->a;
2384 PetscFunctionReturn(0);
2385 }
2386
MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar * array[])2387 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2388 {
2389 PetscFunctionBegin;
2390 *array = NULL;
2391 PetscFunctionReturn(0);
2392 }
2393
MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt * nnz)2394 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2395 {
2396 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2397 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2398 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2399 PetscErrorCode ierr;
2400
2401 PetscFunctionBegin;
2402 /* Set the number of nonzeros in the new matrix */
2403 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2404 PetscFunctionReturn(0);
2405 }
2406
MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2407 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2408 {
2409 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2410 PetscErrorCode ierr;
2411 PetscInt bs=Y->rmap->bs,bs2=bs*bs;
2412 PetscBLASInt one=1;
2413
2414 PetscFunctionBegin;
2415 if (str == SAME_NONZERO_PATTERN) {
2416 PetscScalar alpha = a;
2417 PetscBLASInt bnz;
2418 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2419 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2420 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2421 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2422 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2423 } else {
2424 Mat B;
2425 PetscInt *nnz;
2426 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2427 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2428 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2429 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2430 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2431 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2432 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2433 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2434 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2435 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2436 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2437 ierr = PetscFree(nnz);CHKERRQ(ierr);
2438 }
2439 PetscFunctionReturn(0);
2440 }
2441
MatRealPart_SeqBAIJ(Mat A)2442 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2443 {
2444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2445 PetscInt i,nz = a->bs2*a->i[a->mbs];
2446 MatScalar *aa = a->a;
2447
2448 PetscFunctionBegin;
2449 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2450 PetscFunctionReturn(0);
2451 }
2452
MatImaginaryPart_SeqBAIJ(Mat A)2453 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2454 {
2455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2456 PetscInt i,nz = a->bs2*a->i[a->mbs];
2457 MatScalar *aa = a->a;
2458
2459 PetscFunctionBegin;
2460 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2461 PetscFunctionReturn(0);
2462 }
2463
2464 /*
2465 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2466 */
MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2467 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2468 {
2469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2470 PetscErrorCode ierr;
2471 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2472 PetscInt nz = a->i[m],row,*jj,mr,col;
2473
2474 PetscFunctionBegin;
2475 *nn = n;
2476 if (!ia) PetscFunctionReturn(0);
2477 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2478 else {
2479 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2480 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2481 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2482 jj = a->j;
2483 for (i=0; i<nz; i++) {
2484 collengths[jj[i]]++;
2485 }
2486 cia[0] = oshift;
2487 for (i=0; i<n; i++) {
2488 cia[i+1] = cia[i] + collengths[i];
2489 }
2490 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2491 jj = a->j;
2492 for (row=0; row<m; row++) {
2493 mr = a->i[row+1] - a->i[row];
2494 for (i=0; i<mr; i++) {
2495 col = *jj++;
2496
2497 cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2498 }
2499 }
2500 ierr = PetscFree(collengths);CHKERRQ(ierr);
2501 *ia = cia; *ja = cja;
2502 }
2503 PetscFunctionReturn(0);
2504 }
2505
MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2506 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done)
2507 {
2508 PetscErrorCode ierr;
2509
2510 PetscFunctionBegin;
2511 if (!ia) PetscFunctionReturn(0);
2512 ierr = PetscFree(*ia);CHKERRQ(ierr);
2513 ierr = PetscFree(*ja);CHKERRQ(ierr);
2514 PetscFunctionReturn(0);
2515 }
2516
2517 /*
2518 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2519 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2520 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2521 */
MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2522 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2523 {
2524 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2525 PetscErrorCode ierr;
2526 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2527 PetscInt nz = a->i[m],row,*jj,mr,col;
2528 PetscInt *cspidx;
2529
2530 PetscFunctionBegin;
2531 *nn = n;
2532 if (!ia) PetscFunctionReturn(0);
2533
2534 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2535 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2536 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2537 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2538 jj = a->j;
2539 for (i=0; i<nz; i++) {
2540 collengths[jj[i]]++;
2541 }
2542 cia[0] = oshift;
2543 for (i=0; i<n; i++) {
2544 cia[i+1] = cia[i] + collengths[i];
2545 }
2546 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2547 jj = a->j;
2548 for (row=0; row<m; row++) {
2549 mr = a->i[row+1] - a->i[row];
2550 for (i=0; i<mr; i++) {
2551 col = *jj++;
2552 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2553 cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2554 }
2555 }
2556 ierr = PetscFree(collengths);CHKERRQ(ierr);
2557 *ia = cia;
2558 *ja = cja;
2559 *spidx = cspidx;
2560 PetscFunctionReturn(0);
2561 }
2562
MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2563 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done)
2564 {
2565 PetscErrorCode ierr;
2566
2567 PetscFunctionBegin;
2568 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2569 ierr = PetscFree(*spidx);CHKERRQ(ierr);
2570 PetscFunctionReturn(0);
2571 }
2572
MatShift_SeqBAIJ(Mat Y,PetscScalar a)2573 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2574 {
2575 PetscErrorCode ierr;
2576 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data;
2577
2578 PetscFunctionBegin;
2579 if (!Y->preallocated || !aij->nz) {
2580 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2581 }
2582 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2583 PetscFunctionReturn(0);
2584 }
2585
2586 /* -------------------------------------------------------------------*/
2587 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2588 MatGetRow_SeqBAIJ,
2589 MatRestoreRow_SeqBAIJ,
2590 MatMult_SeqBAIJ_N,
2591 /* 4*/ MatMultAdd_SeqBAIJ_N,
2592 MatMultTranspose_SeqBAIJ,
2593 MatMultTransposeAdd_SeqBAIJ,
2594 NULL,
2595 NULL,
2596 NULL,
2597 /* 10*/ NULL,
2598 MatLUFactor_SeqBAIJ,
2599 NULL,
2600 NULL,
2601 MatTranspose_SeqBAIJ,
2602 /* 15*/ MatGetInfo_SeqBAIJ,
2603 MatEqual_SeqBAIJ,
2604 MatGetDiagonal_SeqBAIJ,
2605 MatDiagonalScale_SeqBAIJ,
2606 MatNorm_SeqBAIJ,
2607 /* 20*/ NULL,
2608 MatAssemblyEnd_SeqBAIJ,
2609 MatSetOption_SeqBAIJ,
2610 MatZeroEntries_SeqBAIJ,
2611 /* 24*/ MatZeroRows_SeqBAIJ,
2612 NULL,
2613 NULL,
2614 NULL,
2615 NULL,
2616 /* 29*/ MatSetUp_SeqBAIJ,
2617 NULL,
2618 NULL,
2619 NULL,
2620 NULL,
2621 /* 34*/ MatDuplicate_SeqBAIJ,
2622 NULL,
2623 NULL,
2624 MatILUFactor_SeqBAIJ,
2625 NULL,
2626 /* 39*/ MatAXPY_SeqBAIJ,
2627 MatCreateSubMatrices_SeqBAIJ,
2628 MatIncreaseOverlap_SeqBAIJ,
2629 MatGetValues_SeqBAIJ,
2630 MatCopy_SeqBAIJ,
2631 /* 44*/ NULL,
2632 MatScale_SeqBAIJ,
2633 MatShift_SeqBAIJ,
2634 NULL,
2635 MatZeroRowsColumns_SeqBAIJ,
2636 /* 49*/ NULL,
2637 MatGetRowIJ_SeqBAIJ,
2638 MatRestoreRowIJ_SeqBAIJ,
2639 MatGetColumnIJ_SeqBAIJ,
2640 MatRestoreColumnIJ_SeqBAIJ,
2641 /* 54*/ MatFDColoringCreate_SeqXAIJ,
2642 NULL,
2643 NULL,
2644 NULL,
2645 MatSetValuesBlocked_SeqBAIJ,
2646 /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2647 MatDestroy_SeqBAIJ,
2648 MatView_SeqBAIJ,
2649 NULL,
2650 NULL,
2651 /* 64*/ NULL,
2652 NULL,
2653 NULL,
2654 NULL,
2655 NULL,
2656 /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2657 NULL,
2658 MatConvert_Basic,
2659 NULL,
2660 NULL,
2661 /* 74*/ NULL,
2662 MatFDColoringApply_BAIJ,
2663 NULL,
2664 NULL,
2665 NULL,
2666 /* 79*/ NULL,
2667 NULL,
2668 NULL,
2669 NULL,
2670 MatLoad_SeqBAIJ,
2671 /* 84*/ NULL,
2672 NULL,
2673 NULL,
2674 NULL,
2675 NULL,
2676 /* 89*/ NULL,
2677 NULL,
2678 NULL,
2679 NULL,
2680 NULL,
2681 /* 94*/ NULL,
2682 NULL,
2683 NULL,
2684 NULL,
2685 NULL,
2686 /* 99*/ NULL,
2687 NULL,
2688 NULL,
2689 NULL,
2690 NULL,
2691 /*104*/ NULL,
2692 MatRealPart_SeqBAIJ,
2693 MatImaginaryPart_SeqBAIJ,
2694 NULL,
2695 NULL,
2696 /*109*/ NULL,
2697 NULL,
2698 NULL,
2699 NULL,
2700 MatMissingDiagonal_SeqBAIJ,
2701 /*114*/ NULL,
2702 NULL,
2703 NULL,
2704 NULL,
2705 NULL,
2706 /*119*/ NULL,
2707 NULL,
2708 MatMultHermitianTranspose_SeqBAIJ,
2709 MatMultHermitianTransposeAdd_SeqBAIJ,
2710 NULL,
2711 /*124*/ NULL,
2712 NULL,
2713 MatInvertBlockDiagonal_SeqBAIJ,
2714 NULL,
2715 NULL,
2716 /*129*/ NULL,
2717 NULL,
2718 NULL,
2719 NULL,
2720 NULL,
2721 /*134*/ NULL,
2722 NULL,
2723 NULL,
2724 NULL,
2725 NULL,
2726 /*139*/ MatSetBlockSizes_Default,
2727 NULL,
2728 NULL,
2729 MatFDColoringSetUp_SeqXAIJ,
2730 NULL,
2731 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2732 MatDestroySubMatrices_SeqBAIJ
2733 };
2734
MatStoreValues_SeqBAIJ(Mat mat)2735 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2736 {
2737 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2738 PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2739 PetscErrorCode ierr;
2740
2741 PetscFunctionBegin;
2742 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2743
2744 /* allocate space for values if not already there */
2745 if (!aij->saved_values) {
2746 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2747 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2748 }
2749
2750 /* copy values over */
2751 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2752 PetscFunctionReturn(0);
2753 }
2754
MatRetrieveValues_SeqBAIJ(Mat mat)2755 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2756 {
2757 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data;
2758 PetscErrorCode ierr;
2759 PetscInt nz = aij->i[aij->mbs]*aij->bs2;
2760
2761 PetscFunctionBegin;
2762 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2763 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2764
2765 /* copy values over */
2766 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2767 PetscFunctionReturn(0);
2768 }
2769
2770 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2771 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2772
MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt * nnz)2773 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2774 {
2775 Mat_SeqBAIJ *b;
2776 PetscErrorCode ierr;
2777 PetscInt i,mbs,nbs,bs2;
2778 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2779
2780 PetscFunctionBegin;
2781 if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2782 if (nz == MAT_SKIP_ALLOCATION) {
2783 skipallocation = PETSC_TRUE;
2784 nz = 0;
2785 }
2786
2787 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2788 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2789 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2790 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2791
2792 B->preallocated = PETSC_TRUE;
2793
2794 mbs = B->rmap->n/bs;
2795 nbs = B->cmap->n/bs;
2796 bs2 = bs*bs;
2797
2798 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2799
2800 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2801 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2802 if (nnz) {
2803 for (i=0; i<mbs; i++) {
2804 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2805 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2806 }
2807 }
2808
2809 b = (Mat_SeqBAIJ*)B->data;
2810 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2811 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2812 ierr = PetscOptionsEnd();CHKERRQ(ierr);
2813
2814 if (!flg) {
2815 switch (bs) {
2816 case 1:
2817 B->ops->mult = MatMult_SeqBAIJ_1;
2818 B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2819 break;
2820 case 2:
2821 B->ops->mult = MatMult_SeqBAIJ_2;
2822 B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2823 break;
2824 case 3:
2825 B->ops->mult = MatMult_SeqBAIJ_3;
2826 B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2827 break;
2828 case 4:
2829 B->ops->mult = MatMult_SeqBAIJ_4;
2830 B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2831 break;
2832 case 5:
2833 B->ops->mult = MatMult_SeqBAIJ_5;
2834 B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2835 break;
2836 case 6:
2837 B->ops->mult = MatMult_SeqBAIJ_6;
2838 B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2839 break;
2840 case 7:
2841 B->ops->mult = MatMult_SeqBAIJ_7;
2842 B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2843 break;
2844 case 9:
2845 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2846 B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
2847 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2848 #else
2849 B->ops->mult = MatMult_SeqBAIJ_N;
2850 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2851 #endif
2852 break;
2853 case 11:
2854 B->ops->mult = MatMult_SeqBAIJ_11;
2855 B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2856 break;
2857 case 15:
2858 B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2859 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2860 break;
2861 default:
2862 B->ops->mult = MatMult_SeqBAIJ_N;
2863 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2864 break;
2865 }
2866 }
2867 B->ops->sor = MatSOR_SeqBAIJ;
2868 b->mbs = mbs;
2869 b->nbs = nbs;
2870 if (!skipallocation) {
2871 if (!b->imax) {
2872 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2873 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2874
2875 b->free_imax_ilen = PETSC_TRUE;
2876 }
2877 /* b->ilen will count nonzeros in each block row so far. */
2878 for (i=0; i<mbs; i++) b->ilen[i] = 0;
2879 if (!nnz) {
2880 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2881 else if (nz < 0) nz = 1;
2882 nz = PetscMin(nz,nbs);
2883 for (i=0; i<mbs; i++) b->imax[i] = nz;
2884 nz = nz*mbs;
2885 } else {
2886 PetscInt64 nz64 = 0;
2887 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2888 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2889 }
2890
2891 /* allocate the matrix space */
2892 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2893 if (B->structure_only) {
2894 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2895 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2896 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2897 } else {
2898 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2899 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2900 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2901 }
2902 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2903
2904 if (B->structure_only) {
2905 b->singlemalloc = PETSC_FALSE;
2906 b->free_a = PETSC_FALSE;
2907 } else {
2908 b->singlemalloc = PETSC_TRUE;
2909 b->free_a = PETSC_TRUE;
2910 }
2911 b->free_ij = PETSC_TRUE;
2912
2913 b->i[0] = 0;
2914 for (i=1; i<mbs+1; i++) {
2915 b->i[i] = b->i[i-1] + b->imax[i-1];
2916 }
2917
2918 } else {
2919 b->free_a = PETSC_FALSE;
2920 b->free_ij = PETSC_FALSE;
2921 }
2922
2923 b->bs2 = bs2;
2924 b->mbs = mbs;
2925 b->nz = 0;
2926 b->maxnz = nz;
2927 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2928 B->was_assembled = PETSC_FALSE;
2929 B->assembled = PETSC_FALSE;
2930 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2931 PetscFunctionReturn(0);
2932 }
2933
MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2934 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2935 {
2936 PetscInt i,m,nz,nz_max=0,*nnz;
2937 PetscScalar *values=NULL;
2938 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2939 PetscErrorCode ierr;
2940
2941 PetscFunctionBegin;
2942 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2943 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2944 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2945 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2946 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2947 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2948 m = B->rmap->n/bs;
2949
2950 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2951 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2952 for (i=0; i<m; i++) {
2953 nz = ii[i+1]- ii[i];
2954 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2955 nz_max = PetscMax(nz_max, nz);
2956 nnz[i] = nz;
2957 }
2958 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2959 ierr = PetscFree(nnz);CHKERRQ(ierr);
2960
2961 values = (PetscScalar*)V;
2962 if (!values) {
2963 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2964 }
2965 for (i=0; i<m; i++) {
2966 PetscInt ncols = ii[i+1] - ii[i];
2967 const PetscInt *icols = jj + ii[i];
2968 if (bs == 1 || !roworiented) {
2969 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2970 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2971 } else {
2972 PetscInt j;
2973 for (j=0; j<ncols; j++) {
2974 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2975 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2976 }
2977 }
2978 }
2979 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2980 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2981 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2982 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2983 PetscFunctionReturn(0);
2984 }
2985
2986 /*@C
2987 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
2988
2989 Not Collective
2990
2991 Input Parameter:
2992 . mat - a MATSEQBAIJ matrix
2993
2994 Output Parameter:
2995 . array - pointer to the data
2996
2997 Level: intermediate
2998
2999 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3000 @*/
MatSeqBAIJGetArray(Mat A,PetscScalar ** array)3001 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3002 {
3003 PetscErrorCode ierr;
3004
3005 PetscFunctionBegin;
3006 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3007 PetscFunctionReturn(0);
3008 }
3009
3010 /*@C
3011 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3012
3013 Not Collective
3014
3015 Input Parameters:
3016 + mat - a MATSEQBAIJ matrix
3017 - array - pointer to the data
3018
3019 Level: intermediate
3020
3021 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3022 @*/
MatSeqBAIJRestoreArray(Mat A,PetscScalar ** array)3023 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3024 {
3025 PetscErrorCode ierr;
3026
3027 PetscFunctionBegin;
3028 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3029 PetscFunctionReturn(0);
3030 }
3031
3032 /*MC
3033 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3034 block sparse compressed row format.
3035
3036 Options Database Keys:
3037 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3038
3039 Level: beginner
3040
3041 Notes:
3042 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3043 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3044
3045 .seealso: MatCreateSeqBAIJ()
3046 M*/
3047
3048 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3049
MatCreate_SeqBAIJ(Mat B)3050 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3051 {
3052 PetscErrorCode ierr;
3053 PetscMPIInt size;
3054 Mat_SeqBAIJ *b;
3055
3056 PetscFunctionBegin;
3057 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3058 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3059
3060 ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
3061 B->data = (void*)b;
3062 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3063
3064 b->row = NULL;
3065 b->col = NULL;
3066 b->icol = NULL;
3067 b->reallocs = 0;
3068 b->saved_values = NULL;
3069
3070 b->roworiented = PETSC_TRUE;
3071 b->nonew = 0;
3072 b->diag = NULL;
3073 B->spptr = NULL;
3074 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2;
3075 b->keepnonzeropattern = PETSC_FALSE;
3076
3077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3088 #if defined(PETSC_HAVE_HYPRE)
3089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3090 #endif
3091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3092 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3093 PetscFunctionReturn(0);
3094 }
3095
MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)3096 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3097 {
3098 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3099 PetscErrorCode ierr;
3100 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3101
3102 PetscFunctionBegin;
3103 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3104
3105 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3106 c->imax = a->imax;
3107 c->ilen = a->ilen;
3108 c->free_imax_ilen = PETSC_FALSE;
3109 } else {
3110 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3111 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3112 for (i=0; i<mbs; i++) {
3113 c->imax[i] = a->imax[i];
3114 c->ilen[i] = a->ilen[i];
3115 }
3116 c->free_imax_ilen = PETSC_TRUE;
3117 }
3118
3119 /* allocate the matrix space */
3120 if (mallocmatspace) {
3121 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3122 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3123 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3124
3125 c->i = a->i;
3126 c->j = a->j;
3127 c->singlemalloc = PETSC_FALSE;
3128 c->free_a = PETSC_TRUE;
3129 c->free_ij = PETSC_FALSE;
3130 c->parent = A;
3131 C->preallocated = PETSC_TRUE;
3132 C->assembled = PETSC_TRUE;
3133
3134 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3135 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3136 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3137 } else {
3138 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3139 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3140
3141 c->singlemalloc = PETSC_TRUE;
3142 c->free_a = PETSC_TRUE;
3143 c->free_ij = PETSC_TRUE;
3144
3145 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3146 if (mbs > 0) {
3147 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3148 if (cpvalues == MAT_COPY_VALUES) {
3149 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3150 } else {
3151 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3152 }
3153 }
3154 C->preallocated = PETSC_TRUE;
3155 C->assembled = PETSC_TRUE;
3156 }
3157 }
3158
3159 c->roworiented = a->roworiented;
3160 c->nonew = a->nonew;
3161
3162 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3163 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3164
3165 c->bs2 = a->bs2;
3166 c->mbs = a->mbs;
3167 c->nbs = a->nbs;
3168
3169 if (a->diag) {
3170 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3171 c->diag = a->diag;
3172 c->free_diag = PETSC_FALSE;
3173 } else {
3174 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3175 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3176 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3177 c->free_diag = PETSC_TRUE;
3178 }
3179 } else c->diag = NULL;
3180
3181 c->nz = a->nz;
3182 c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3183 c->solve_work = NULL;
3184 c->mult_work = NULL;
3185 c->sor_workt = NULL;
3186 c->sor_work = NULL;
3187
3188 c->compressedrow.use = a->compressedrow.use;
3189 c->compressedrow.nrows = a->compressedrow.nrows;
3190 if (a->compressedrow.use) {
3191 i = a->compressedrow.nrows;
3192 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3193 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3194 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3195 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3196 } else {
3197 c->compressedrow.use = PETSC_FALSE;
3198 c->compressedrow.i = NULL;
3199 c->compressedrow.rindex = NULL;
3200 }
3201 C->nonzerostate = A->nonzerostate;
3202
3203 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3204 PetscFunctionReturn(0);
3205 }
3206
MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat * B)3207 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3208 {
3209 PetscErrorCode ierr;
3210
3211 PetscFunctionBegin;
3212 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3213 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3214 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3215 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3216 PetscFunctionReturn(0);
3217 }
3218
3219 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)3220 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3221 {
3222 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3223 PetscInt *rowidxs,*colidxs;
3224 PetscScalar *matvals;
3225 PetscErrorCode ierr;
3226
3227 PetscFunctionBegin;
3228 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3229
3230 /* read matrix header */
3231 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3232 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3233 M = header[1]; N = header[2]; nz = header[3];
3234 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3235 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3236 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3237
3238 /* set block sizes from the viewer's .info file */
3239 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3240 /* set local and global sizes if not set already */
3241 if (mat->rmap->n < 0) mat->rmap->n = M;
3242 if (mat->cmap->n < 0) mat->cmap->n = N;
3243 if (mat->rmap->N < 0) mat->rmap->N = M;
3244 if (mat->cmap->N < 0) mat->cmap->N = N;
3245 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3246 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3247
3248 /* check if the matrix sizes are correct */
3249 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3250 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3251 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3252 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3253 mbs = m/bs; nbs = n/bs;
3254
3255 /* read in row lengths, column indices and nonzero values */
3256 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3257 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3258 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3259 sum = rowidxs[m];
3260 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3261
3262 /* read in column indices and nonzero values */
3263 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3264 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3265 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3266
3267 { /* preallocate matrix storage */
3268 PetscBT bt; /* helper bit set to count nonzeros */
3269 PetscInt *nnz;
3270 PetscBool sbaij;
3271
3272 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3273 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3274 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3275 for (i=0; i<mbs; i++) {
3276 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3277 for (k=0; k<bs; k++) {
3278 PetscInt row = bs*i + k;
3279 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3280 PetscInt col = colidxs[j];
3281 if (!sbaij || col >= row)
3282 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3283 }
3284 }
3285 }
3286 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3287 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3288 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3289 ierr = PetscFree(nnz);CHKERRQ(ierr);
3290 }
3291
3292 /* store matrix values */
3293 for (i=0; i<m; i++) {
3294 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3295 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3296 }
3297
3298 ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3299 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3300 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3302 PetscFunctionReturn(0);
3303 }
3304
MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)3305 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3306 {
3307 PetscErrorCode ierr;
3308 PetscBool isbinary;
3309
3310 PetscFunctionBegin;
3311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3312 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3313 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3314 PetscFunctionReturn(0);
3315 }
3316
3317 /*@C
3318 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3319 compressed row) format. For good matrix assembly performance the
3320 user should preallocate the matrix storage by setting the parameter nz
3321 (or the array nnz). By setting these parameters accurately, performance
3322 during matrix assembly can be increased by more than a factor of 50.
3323
3324 Collective
3325
3326 Input Parameters:
3327 + comm - MPI communicator, set to PETSC_COMM_SELF
3328 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3329 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3330 . m - number of rows
3331 . n - number of columns
3332 . nz - number of nonzero blocks per block row (same for all rows)
3333 - nnz - array containing the number of nonzero blocks in the various block rows
3334 (possibly different for each block row) or NULL
3335
3336 Output Parameter:
3337 . A - the matrix
3338
3339 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3340 MatXXXXSetPreallocation() paradigm instead of this routine directly.
3341 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3342
3343 Options Database Keys:
3344 + -mat_no_unroll - uses code that does not unroll the loops in the
3345 block calculations (much slower)
3346 - -mat_block_size - size of the blocks to use
3347
3348 Level: intermediate
3349
3350 Notes:
3351 The number of rows and columns must be divisible by blocksize.
3352
3353 If the nnz parameter is given then the nz parameter is ignored
3354
3355 A nonzero block is any block that as 1 or more nonzeros in it
3356
3357 The block AIJ format is fully compatible with standard Fortran 77
3358 storage. That is, the stored row and column indices can begin at
3359 either one (as in Fortran) or zero. See the users' manual for details.
3360
3361 Specify the preallocated storage with either nz or nnz (not both).
3362 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3363 allocation. See Users-Manual: ch_mat for details.
3364 matrices.
3365
3366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3367 @*/
MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)3368 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3369 {
3370 PetscErrorCode ierr;
3371
3372 PetscFunctionBegin;
3373 ierr = MatCreate(comm,A);CHKERRQ(ierr);
3374 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3375 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3376 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3377 PetscFunctionReturn(0);
3378 }
3379
3380 /*@C
3381 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3382 per row in the matrix. For good matrix assembly performance the
3383 user should preallocate the matrix storage by setting the parameter nz
3384 (or the array nnz). By setting these parameters accurately, performance
3385 during matrix assembly can be increased by more than a factor of 50.
3386
3387 Collective
3388
3389 Input Parameters:
3390 + B - the matrix
3391 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3392 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3393 . nz - number of block nonzeros per block row (same for all rows)
3394 - nnz - array containing the number of block nonzeros in the various block rows
3395 (possibly different for each block row) or NULL
3396
3397 Options Database Keys:
3398 + -mat_no_unroll - uses code that does not unroll the loops in the
3399 block calculations (much slower)
3400 - -mat_block_size - size of the blocks to use
3401
3402 Level: intermediate
3403
3404 Notes:
3405 If the nnz parameter is given then the nz parameter is ignored
3406
3407 You can call MatGetInfo() to get information on how effective the preallocation was;
3408 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3409 You can also run with the option -info and look for messages with the string
3410 malloc in them to see if additional memory allocation was needed.
3411
3412 The block AIJ format is fully compatible with standard Fortran 77
3413 storage. That is, the stored row and column indices can begin at
3414 either one (as in Fortran) or zero. See the users' manual for details.
3415
3416 Specify the preallocated storage with either nz or nnz (not both).
3417 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3418 allocation. See Users-Manual: ch_mat for details.
3419
3420 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3421 @*/
MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3422 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3423 {
3424 PetscErrorCode ierr;
3425
3426 PetscFunctionBegin;
3427 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3428 PetscValidType(B,1);
3429 PetscValidLogicalCollectiveInt(B,bs,2);
3430 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3431 PetscFunctionReturn(0);
3432 }
3433
3434 /*@C
3435 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3436
3437 Collective
3438
3439 Input Parameters:
3440 + B - the matrix
3441 . i - the indices into j for the start of each local row (starts with zero)
3442 . j - the column indices for each local row (starts with zero) these must be sorted for each row
3443 - v - optional values in the matrix
3444
3445 Level: advanced
3446
3447 Notes:
3448 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
3449 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3450 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3451 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3452 block column and the second index is over columns within a block.
3453
3454 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3455
3456 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3457 @*/
MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])3458 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3459 {
3460 PetscErrorCode ierr;
3461
3462 PetscFunctionBegin;
3463 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3464 PetscValidType(B,1);
3465 PetscValidLogicalCollectiveInt(B,bs,2);
3466 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3467 PetscFunctionReturn(0);
3468 }
3469
3470
3471 /*@
3472 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3473
3474 Collective
3475
3476 Input Parameters:
3477 + comm - must be an MPI communicator of size 1
3478 . bs - size of block
3479 . m - number of rows
3480 . n - number of columns
3481 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3482 . j - column indices
3483 - a - matrix values
3484
3485 Output Parameter:
3486 . mat - the matrix
3487
3488 Level: advanced
3489
3490 Notes:
3491 The i, j, and a arrays are not copied by this routine, the user must free these arrays
3492 once the matrix is destroyed
3493
3494 You cannot set new nonzero locations into this matrix, that will generate an error.
3495
3496 The i and j indices are 0 based
3497
3498 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3499
3500 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3501 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3502 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3503 with column-major ordering within blocks.
3504
3505 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3506
3507 @*/
MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat * mat)3508 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3509 {
3510 PetscErrorCode ierr;
3511 PetscInt ii;
3512 Mat_SeqBAIJ *baij;
3513
3514 PetscFunctionBegin;
3515 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3516 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3517
3518 ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3519 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3520 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3521 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3522 baij = (Mat_SeqBAIJ*)(*mat)->data;
3523 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3524 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3525
3526 baij->i = i;
3527 baij->j = j;
3528 baij->a = a;
3529
3530 baij->singlemalloc = PETSC_FALSE;
3531 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3532 baij->free_a = PETSC_FALSE;
3533 baij->free_ij = PETSC_FALSE;
3534
3535 for (ii=0; ii<m; ii++) {
3536 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3537 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3538 }
3539 if (PetscDefined(USE_DEBUG)) {
3540 for (ii=0; ii<baij->i[m]; ii++) {
3541 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3542 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3543 }
3544 }
3545
3546 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3547 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548 PetscFunctionReturn(0);
3549 }
3550
MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3551 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3552 {
3553 PetscErrorCode ierr;
3554 PetscMPIInt size;
3555
3556 PetscFunctionBegin;
3557 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3558 if (size == 1 && scall == MAT_REUSE_MATRIX) {
3559 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3560 } else {
3561 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3562 }
3563 PetscFunctionReturn(0);
3564 }
3565