1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar ** values)20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
21 {
22   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
23   PetscErrorCode ierr;
24   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
25   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
26   PetscReal      shift = 0.0;
27   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
28 
29   PetscFunctionBegin;
30   allowzeropivot = PetscNot(A->erroriffailure);
31 
32   if (a->idiagvalid) {
33     if (values) *values = a->idiag;
34     PetscFunctionReturn(0);
35   }
36   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
37   diag_offset = a->diag;
38   if (!a->idiag) {
39     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
40     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
41   }
42   diag  = a->idiag;
43   if (values) *values = a->idiag;
44   /* factor and invert each block */
45   switch (bs) {
46   case 1:
47     for (i=0; i<mbs; i++) {
48       odiag    = v + 1*diag_offset[i];
49       diag[0]  = odiag[0];
50 
51       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
52         if (allowzeropivot) {
53           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
54           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
55           A->factorerror_zeropivot_row   = i;
56           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
57         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
58       }
59 
60       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
61       diag    += 1;
62     }
63     break;
64   case 2:
65     for (i=0; i<mbs; i++) {
66       odiag    = v + 4*diag_offset[i];
67       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
69       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
70       diag    += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
80       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
81       diag    += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
88       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
89       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
90       diag  += 16;
91     }
92     break;
93   case 5:
94     for (i=0; i<mbs; i++) {
95       odiag  = v + 25*diag_offset[i];
96       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
97       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
98       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99       diag  += 25;
100     }
101     break;
102   case 6:
103     for (i=0; i<mbs; i++) {
104       odiag  = v + 36*diag_offset[i];
105       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
107       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108       diag  += 36;
109     }
110     break;
111   case 7:
112     for (i=0; i<mbs; i++) {
113       odiag  = v + 49*diag_offset[i];
114       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 49;
118     }
119     break;
120   default:
121     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
122     for (i=0; i<mbs; i++) {
123       odiag  = v + bs2*diag_offset[i];
124       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
125       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
126       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127       diag  += bs2;
128     }
129     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 
MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
136 {
137   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
138   PetscScalar       *x,*work,*w,*workt,*t;
139   const MatScalar   *v,*aa = a->a, *idiag;
140   const PetscScalar *b,*xb;
141   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
142   PetscErrorCode    ierr;
143   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
145 
146   PetscFunctionBegin;
147   its = its*lits;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
151   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
152   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
153 
154   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155 
156   if (!m) PetscFunctionReturn(0);
157   diag  = a->diag;
158   idiag = a->idiag;
159   k    = PetscMax(A->rmap->n,A->cmap->n);
160   if (!a->mult_work) {
161     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162   }
163   if (!a->sor_workt) {
164     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165   }
166   if (!a->sor_work) {
167     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168   }
169   work = a->mult_work;
170   t    = a->sor_workt;
171   w    = a->sor_work;
172 
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178       switch (bs) {
179       case 1:
180         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181         t[0] = b[0];
182         i2     = 1;
183         idiag += 1;
184         for (i=1; i<m; i++) {
185           v  = aa + ai[i];
186           vi = aj + ai[i];
187           nz = diag[i] - ai[i];
188           s[0] = b[i2];
189           for (j=0; j<nz; j++) {
190             xw[0] = x[vi[j]];
191             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192           }
193           t[i2] = s[0];
194           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195           x[i2]  = xw[0];
196           idiag += 1;
197           i2    += 1;
198         }
199         break;
200       case 2:
201         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202         t[0] = b[0]; t[1] = b[1];
203         i2     = 2;
204         idiag += 4;
205         for (i=1; i<m; i++) {
206           v  = aa + 4*ai[i];
207           vi = aj + ai[i];
208           nz = diag[i] - ai[i];
209           s[0] = b[i2]; s[1] = b[i2+1];
210           for (j=0; j<nz; j++) {
211             idx = 2*vi[j];
212             it  = 4*j;
213             xw[0] = x[idx]; xw[1] = x[1+idx];
214             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215           }
216           t[i2] = s[0]; t[i2+1] = s[1];
217           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218           x[i2]   = xw[0]; x[i2+1] = xw[1];
219           idiag  += 4;
220           i2     += 2;
221         }
222         break;
223       case 3:
224         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226         i2     = 3;
227         idiag += 9;
228         for (i=1; i<m; i++) {
229           v  = aa + 9*ai[i];
230           vi = aj + ai[i];
231           nz = diag[i] - ai[i];
232           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233           while (nz--) {
234             idx = 3*(*vi++);
235             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237             v  += 9;
238           }
239           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242           idiag  += 9;
243           i2     += 3;
244         }
245         break;
246       case 4:
247         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249         i2     = 4;
250         idiag += 16;
251         for (i=1; i<m; i++) {
252           v  = aa + 16*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256           while (nz--) {
257             idx = 4*(*vi++);
258             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260             v  += 16;
261           }
262           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265           idiag  += 16;
266           i2     += 4;
267         }
268         break;
269       case 5:
270         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272         i2     = 5;
273         idiag += 25;
274         for (i=1; i<m; i++) {
275           v  = aa + 25*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279           while (nz--) {
280             idx = 5*(*vi++);
281             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283             v  += 25;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288           idiag  += 25;
289           i2     += 5;
290         }
291         break;
292       case 6:
293         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295         i2     = 6;
296         idiag += 36;
297         for (i=1; i<m; i++) {
298           v  = aa + 36*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302           while (nz--) {
303             idx = 6*(*vi++);
304             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307             v  += 36;
308           }
309           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313           idiag  += 36;
314           i2     += 6;
315         }
316         break;
317       case 7:
318         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321         i2     = 7;
322         idiag += 49;
323         for (i=1; i<m; i++) {
324           v  = aa + 49*ai[i];
325           vi = aj + ai[i];
326           nz = diag[i] - ai[i];
327           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329           while (nz--) {
330             idx = 7*(*vi++);
331             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334             v  += 49;
335           }
336           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341           idiag  += 49;
342           i2     += 7;
343         }
344         break;
345       default:
346         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
348         i2     = bs;
349         idiag += bs2;
350         for (i=1; i<m; i++) {
351           v  = aa + bs2*ai[i];
352           vi = aj + ai[i];
353           nz = diag[i] - ai[i];
354 
355           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
356           /* copy all rows of x that are needed into contiguous space */
357           workt = work;
358           for (j=0; j<nz; j++) {
359             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
360             workt += bs;
361           }
362           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
364           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365 
366           idiag += bs2;
367           i2    += bs;
368         }
369         break;
370       }
371       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373       xb = t;
374     }
375     else xb = b;
376     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377       idiag = a->idiag+bs2*(a->mbs-1);
378       i2 = bs * (m-1);
379       switch (bs) {
380       case 1:
381         s[0]  = xb[i2];
382         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383         x[i2] = xw[0];
384         i2   -= 1;
385         for (i=m-2; i>=0; i--) {
386           v  = aa + (diag[i]+1);
387           vi = aj + diag[i] + 1;
388           nz = ai[i+1] - diag[i] - 1;
389           s[0] = xb[i2];
390           for (j=0; j<nz; j++) {
391             xw[0] = x[vi[j]];
392             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393           }
394           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395           x[i2]  = xw[0];
396           idiag -= 1;
397           i2    -= 1;
398         }
399         break;
400       case 2:
401         s[0]  = xb[i2]; s[1] = xb[i2+1];
402         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403         x[i2] = xw[0]; x[i2+1] = xw[1];
404         i2    -= 2;
405         idiag -= 4;
406         for (i=m-2; i>=0; i--) {
407           v  = aa + 4*(diag[i] + 1);
408           vi = aj + diag[i] + 1;
409           nz = ai[i+1] - diag[i] - 1;
410           s[0] = xb[i2]; s[1] = xb[i2+1];
411           for (j=0; j<nz; j++) {
412             idx = 2*vi[j];
413             it  = 4*j;
414             xw[0] = x[idx]; xw[1] = x[1+idx];
415             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416           }
417           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418           x[i2]   = xw[0]; x[i2+1] = xw[1];
419           idiag  -= 4;
420           i2     -= 2;
421         }
422         break;
423       case 3:
424         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427         i2    -= 3;
428         idiag -= 9;
429         for (i=m-2; i>=0; i--) {
430           v  = aa + 9*(diag[i]+1);
431           vi = aj + diag[i] + 1;
432           nz = ai[i+1] - diag[i] - 1;
433           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434           while (nz--) {
435             idx = 3*(*vi++);
436             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438             v  += 9;
439           }
440           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442           idiag  -= 9;
443           i2     -= 3;
444         }
445         break;
446       case 4:
447         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450         i2    -= 4;
451         idiag -= 16;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 16*(diag[i]+1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457           while (nz--) {
458             idx = 4*(*vi++);
459             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461             v  += 16;
462           }
463           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465           idiag  -= 16;
466           i2     -= 4;
467         }
468         break;
469       case 5:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473         i2    -= 5;
474         idiag -= 25;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 25*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480           while (nz--) {
481             idx = 5*(*vi++);
482             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484             v  += 25;
485           }
486           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488           idiag  -= 25;
489           i2     -= 5;
490         }
491         break;
492       case 6:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496         i2    -= 6;
497         idiag -= 36;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 36*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503           while (nz--) {
504             idx = 6*(*vi++);
505             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508             v  += 36;
509           }
510           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512           idiag  -= 36;
513           i2     -= 6;
514         }
515         break;
516       case 7:
517         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522         i2    -= 7;
523         idiag -= 49;
524         for (i=m-2; i>=0; i--) {
525           v  = aa + 49*(diag[i]+1);
526           vi = aj + diag[i] + 1;
527           nz = ai[i+1] - diag[i] - 1;
528           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530           while (nz--) {
531             idx = 7*(*vi++);
532             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535             v  += 49;
536           }
537           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540           idiag  -= 49;
541           i2     -= 7;
542         }
543         break;
544       default:
545         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
546         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547         i2    -= bs;
548         idiag -= bs2;
549         for (i=m-2; i>=0; i--) {
550           v  = aa + bs2*(diag[i]+1);
551           vi = aj + diag[i] + 1;
552           nz = ai[i+1] - diag[i] - 1;
553 
554           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
555           /* copy all rows of x that are needed into contiguous space */
556           workt = work;
557           for (j=0; j<nz; j++) {
558             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
559             workt += bs;
560           }
561           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563 
564           idiag -= bs2;
565           i2    -= bs;
566         }
567         break;
568       }
569       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570     }
571     its--;
572   }
573   while (its--) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       idiag = a->idiag;
576       i2 = 0;
577       switch (bs) {
578       case 1:
579         for (i=0; i<m; i++) {
580           v  = aa + ai[i];
581           vi = aj + ai[i];
582           nz = ai[i+1] - ai[i];
583           s[0] = b[i2];
584           for (j=0; j<nz; j++) {
585             xw[0] = x[vi[j]];
586             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587           }
588           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589           x[i2] += xw[0];
590           idiag += 1;
591           i2    += 1;
592         }
593         break;
594       case 2:
595         for (i=0; i<m; i++) {
596           v  = aa + 4*ai[i];
597           vi = aj + ai[i];
598           nz = ai[i+1] - ai[i];
599           s[0] = b[i2]; s[1] = b[i2+1];
600           for (j=0; j<nz; j++) {
601             idx = 2*vi[j];
602             it  = 4*j;
603             xw[0] = x[idx]; xw[1] = x[1+idx];
604             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605           }
606           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607           x[i2]  += xw[0]; x[i2+1] += xw[1];
608           idiag  += 4;
609           i2     += 2;
610         }
611         break;
612       case 3:
613         for (i=0; i<m; i++) {
614           v  = aa + 9*ai[i];
615           vi = aj + ai[i];
616           nz = ai[i+1] - ai[i];
617           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618           while (nz--) {
619             idx = 3*(*vi++);
620             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622             v  += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626           idiag  += 9;
627           i2     += 3;
628         }
629         break;
630       case 4:
631         for (i=0; i<m; i++) {
632           v  = aa + 16*ai[i];
633           vi = aj + ai[i];
634           nz = ai[i+1] - ai[i];
635           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636           while (nz--) {
637             idx = 4*(*vi++);
638             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640             v  += 16;
641           }
642           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644           idiag  += 16;
645           i2     += 4;
646         }
647         break;
648       case 5:
649         for (i=0; i<m; i++) {
650           v  = aa + 25*ai[i];
651           vi = aj + ai[i];
652           nz = ai[i+1] - ai[i];
653           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654           while (nz--) {
655             idx = 5*(*vi++);
656             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658             v  += 25;
659           }
660           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662           idiag  += 25;
663           i2     += 5;
664         }
665         break;
666       case 6:
667         for (i=0; i<m; i++) {
668           v  = aa + 36*ai[i];
669           vi = aj + ai[i];
670           nz = ai[i+1] - ai[i];
671           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672           while (nz--) {
673             idx = 6*(*vi++);
674             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677             v  += 36;
678           }
679           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682           idiag  += 36;
683           i2     += 6;
684         }
685         break;
686       case 7:
687         for (i=0; i<m; i++) {
688           v  = aa + 49*ai[i];
689           vi = aj + ai[i];
690           nz = ai[i+1] - ai[i];
691           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693           while (nz--) {
694             idx = 7*(*vi++);
695             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698             v  += 49;
699           }
700           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703           idiag  += 49;
704           i2     += 7;
705         }
706         break;
707       default:
708         for (i=0; i<m; i++) {
709           v  = aa + bs2*ai[i];
710           vi = aj + ai[i];
711           nz = ai[i+1] - ai[i];
712 
713           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
714           /* copy all rows of x that are needed into contiguous space */
715           workt = work;
716           for (j=0; j<nz; j++) {
717             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
718             workt += bs;
719           }
720           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722 
723           idiag += bs2;
724           i2    += bs;
725         }
726         break;
727       }
728       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729     }
730     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731       idiag = a->idiag+bs2*(a->mbs-1);
732       i2 = bs * (m-1);
733       switch (bs) {
734       case 1:
735         for (i=m-1; i>=0; i--) {
736           v  = aa + ai[i];
737           vi = aj + ai[i];
738           nz = ai[i+1] - ai[i];
739           s[0] = b[i2];
740           for (j=0; j<nz; j++) {
741             xw[0] = x[vi[j]];
742             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743           }
744           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745           x[i2] += xw[0];
746           idiag -= 1;
747           i2    -= 1;
748         }
749         break;
750       case 2:
751         for (i=m-1; i>=0; i--) {
752           v  = aa + 4*ai[i];
753           vi = aj + ai[i];
754           nz = ai[i+1] - ai[i];
755           s[0] = b[i2]; s[1] = b[i2+1];
756           for (j=0; j<nz; j++) {
757             idx = 2*vi[j];
758             it  = 4*j;
759             xw[0] = x[idx]; xw[1] = x[1+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761           }
762           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763           x[i2]  += xw[0]; x[i2+1] += xw[1];
764           idiag  -= 4;
765           i2     -= 2;
766         }
767         break;
768       case 3:
769         for (i=m-1; i>=0; i--) {
770           v  = aa + 9*ai[i];
771           vi = aj + ai[i];
772           nz = ai[i+1] - ai[i];
773           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774           while (nz--) {
775             idx = 3*(*vi++);
776             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778             v  += 9;
779           }
780           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782           idiag  -= 9;
783           i2     -= 3;
784         }
785         break;
786       case 4:
787         for (i=m-1; i>=0; i--) {
788           v  = aa + 16*ai[i];
789           vi = aj + ai[i];
790           nz = ai[i+1] - ai[i];
791           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792           while (nz--) {
793             idx = 4*(*vi++);
794             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796             v  += 16;
797           }
798           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800           idiag  -= 16;
801           i2     -= 4;
802         }
803         break;
804       case 5:
805         for (i=m-1; i>=0; i--) {
806           v  = aa + 25*ai[i];
807           vi = aj + ai[i];
808           nz = ai[i+1] - ai[i];
809           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810           while (nz--) {
811             idx = 5*(*vi++);
812             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814             v  += 25;
815           }
816           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818           idiag  -= 25;
819           i2     -= 5;
820         }
821         break;
822       case 6:
823         for (i=m-1; i>=0; i--) {
824           v  = aa + 36*ai[i];
825           vi = aj + ai[i];
826           nz = ai[i+1] - ai[i];
827           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828           while (nz--) {
829             idx = 6*(*vi++);
830             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833             v  += 36;
834           }
835           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838           idiag  -= 36;
839           i2     -= 6;
840         }
841         break;
842       case 7:
843         for (i=m-1; i>=0; i--) {
844           v  = aa + 49*ai[i];
845           vi = aj + ai[i];
846           nz = ai[i+1] - ai[i];
847           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849           while (nz--) {
850             idx = 7*(*vi++);
851             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854             v  += 49;
855           }
856           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859           idiag  -= 49;
860           i2     -= 7;
861         }
862         break;
863       default:
864         for (i=m-1; i>=0; i--) {
865           v  = aa + bs2*ai[i];
866           vi = aj + ai[i];
867           nz = ai[i+1] - ai[i];
868 
869           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
874             workt += bs;
875           }
876           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878 
879           idiag -= bs2;
880           i2    -= bs;
881         }
882         break;
883       }
884       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885     }
886   }
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 
893 /*
894     Special version for direct calls from Fortran (Used in PETSc-fun3d)
895 */
896 #if defined(PETSC_HAVE_FORTRAN_CAPS)
897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899 #define matsetvaluesblocked4_ matsetvaluesblocked4
900 #endif
901 
matsetvaluesblocked4_(Mat * AA,PetscInt * mm,const PetscInt im[],PetscInt * nn,const PetscInt in[],const PetscScalar v[])902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
903 {
904   Mat               A  = *AA;
905   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
906   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
907   PetscInt          *ai    =a->i,*ailen=a->ilen;
908   PetscInt          *aj    =a->j,stepval,lastcol = -1;
909   const PetscScalar *value = v;
910   MatScalar         *ap,*aa = a->a,*bap;
911   PetscErrorCode    ierr;
912 
913   PetscFunctionBegin;
914   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915   stepval = (n-1)*4;
916   for (k=0; k<m; k++) { /* loop over added rows */
917     row  = im[k];
918     rp   = aj + ai[row];
919     ap   = aa + 16*ai[row];
920     nrow = ailen[row];
921     low  = 0;
922     high = nrow;
923     for (l=0; l<n; l++) { /* loop over added columns */
924       col = in[l];
925       if (col <= lastcol)  low = 0;
926       else                high = nrow;
927       lastcol = col;
928       value   = v + k*(stepval+4 + l)*4;
929       while (high-low > 7) {
930         t = (low+high)/2;
931         if (rp[t] > col) high = t;
932         else             low  = t;
933       }
934       for (i=low; i<high; i++) {
935         if (rp[i] > col) break;
936         if (rp[i] == col) {
937           bap = ap +  16*i;
938           for (ii=0; ii<4; ii++,value+=stepval) {
939             for (jj=ii; jj<16; jj+=4) {
940               bap[jj] += *value++;
941             }
942           }
943           goto noinsert2;
944         }
945       }
946       N = nrow++ - 1;
947       high++; /* added new column index thus must search to one higher than before */
948       /* shift up all the later entries in this row */
949       for (ii=N; ii>=i; ii--) {
950         rp[ii+1] = rp[ii];
951         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
952       }
953       if (N >= i) {
954         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
955       }
956       rp[i] = col;
957       bap   = ap +  16*i;
958       for (ii=0; ii<4; ii++,value+=stepval) {
959         for (jj=ii; jj<16; jj+=4) {
960           bap[jj] = *value++;
961         }
962       }
963       noinsert2:;
964       low = i;
965     }
966     ailen[row] = nrow;
967   }
968   PetscFunctionReturnVoid();
969 }
970 
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976 
matsetvalues4_(Mat * AA,PetscInt * mm,PetscInt * im,PetscInt * nn,PetscInt * in,PetscScalar * v)977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979   Mat         A  = *AA;
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
982   PetscInt    *ai=a->i,*ailen=a->ilen;
983   PetscInt    *aj=a->j,brow,bcol;
984   PetscInt    ridx,cidx,lastcol = -1;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   for (k=0; k<m; k++) { /* loop over added rows */
990     row  = im[k]; brow = row/4;
991     rp   = aj + ai[brow];
992     ap   = aa + 16*ai[brow];
993     nrow = ailen[brow];
994     low  = 0;
995     high = nrow;
996     for (l=0; l<n; l++) { /* loop over added columns */
997       col   = in[l]; bcol = col/4;
998       ridx  = row % 4; cidx = col % 4;
999       value = v[l + k*n];
1000       if (col <= lastcol)  low = 0;
1001       else                high = nrow;
1002       lastcol = col;
1003       while (high-low > 7) {
1004         t = (low+high)/2;
1005         if (rp[t] > bcol) high = t;
1006         else              low  = t;
1007       }
1008       for (i=low; i<high; i++) {
1009         if (rp[i] > bcol) break;
1010         if (rp[i] == bcol) {
1011           bap   = ap +  16*i + 4*cidx + ridx;
1012           *bap += value;
1013           goto noinsert1;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1020       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1021       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1022       rp[i]                    = bcol;
1023       ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025       low = i;
1026     }
1027     ailen[brow] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 /*
1033      Checks for missing diagonals
1034 */
MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool * missing,PetscInt * d)1035 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1036 {
1037   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1038   PetscErrorCode ierr;
1039   PetscInt       *diag,*ii = a->i,i;
1040 
1041   PetscFunctionBegin;
1042   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1043   *missing = PETSC_FALSE;
1044   if (A->rmap->n > 0 && !ii) {
1045     *missing = PETSC_TRUE;
1046     if (d) *d = 0;
1047     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1048   } else {
1049     PetscInt n;
1050     n = PetscMin(a->mbs, a->nbs);
1051     diag = a->diag;
1052     for (i=0; i<n; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
MatMarkDiagonal_SeqBAIJ(Mat A)1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1065 {
1066   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,j,m = a->mbs;
1069 
1070   PetscFunctionBegin;
1071   if (!a->diag) {
1072     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1073     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1074     a->free_diag = PETSC_TRUE;
1075   }
1076   for (i=0; i<m; i++) {
1077     a->diag[i] = a->i[i+1];
1078     for (j=a->i[i]; j<a->i[i+1]; j++) {
1079       if (a->j[j] == i) {
1080         a->diag[i] = j;
1081         break;
1082       }
1083     }
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 
MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * inia[],const PetscInt * inja[],PetscBool * done)1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1090 {
1091   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1092   PetscErrorCode ierr;
1093   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1094   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1095 
1096   PetscFunctionBegin;
1097   *nn = n;
1098   if (!ia) PetscFunctionReturn(0);
1099   if (symmetric) {
1100     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1101     nz   = tia[n];
1102   } else {
1103     tia = a->i; tja = a->j;
1104   }
1105 
1106   if (!blockcompressed && bs > 1) {
1107     (*nn) *= bs;
1108     /* malloc & create the natural set of indices */
1109     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1110     if (n) {
1111       (*ia)[0] = oshift;
1112       for (j=1; j<bs; j++) {
1113         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1114       }
1115     }
1116 
1117     for (i=1; i<n; i++) {
1118       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1119       for (j=1; j<bs; j++) {
1120         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1121       }
1122     }
1123     if (n) {
1124       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1125     }
1126 
1127     if (inja) {
1128       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1129       cnt = 0;
1130       for (i=0; i<n; i++) {
1131         for (j=0; j<bs; j++) {
1132           for (k=tia[i]; k<tia[i+1]; k++) {
1133             for (l=0; l<bs; l++) {
1134               (*ja)[cnt++] = bs*tja[k] + l;
1135             }
1136           }
1137         }
1138       }
1139     }
1140 
1141     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1142       ierr = PetscFree(tia);CHKERRQ(ierr);
1143       ierr = PetscFree(tja);CHKERRQ(ierr);
1144     }
1145   } else if (oshift == 1) {
1146     if (symmetric) {
1147       nz = tia[A->rmap->n/bs];
1148       /*  add 1 to i and j indices */
1149       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1150       *ia = tia;
1151       if (ja) {
1152         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1153         *ja = tja;
1154       }
1155     } else {
1156       nz = a->i[A->rmap->n/bs];
1157       /* malloc space and  add 1 to i and j indices */
1158       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1159       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1160       if (ja) {
1161         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1162         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1163       }
1164     }
1165   } else {
1166     *ia = tia;
1167     if (ja) *ja = tja;
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1173 {
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   if (!ia) PetscFunctionReturn(0);
1178   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1179     ierr = PetscFree(*ia);CHKERRQ(ierr);
1180     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
MatDestroy_SeqBAIJ(Mat A)1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1186 {
1187   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1188   PetscErrorCode ierr;
1189 
1190   PetscFunctionBegin;
1191 #if defined(PETSC_USE_LOG)
1192   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1193 #endif
1194   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1195   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1197   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1198   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1199   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1200   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1201   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1205   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1206   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1207 
1208   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1209   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1210   ierr = PetscFree(A->data);CHKERRQ(ierr);
1211 
1212   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1225 #if defined(PETSC_HAVE_HYPRE)
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1227 #endif
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233 {
1234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   switch (op) {
1239   case MAT_ROW_ORIENTED:
1240     a->roworiented = flg;
1241     break;
1242   case MAT_KEEP_NONZERO_PATTERN:
1243     a->keepnonzeropattern = flg;
1244     break;
1245   case MAT_NEW_NONZERO_LOCATIONS:
1246     a->nonew = (flg ? 0 : 1);
1247     break;
1248   case MAT_NEW_NONZERO_LOCATION_ERR:
1249     a->nonew = (flg ? -1 : 0);
1250     break;
1251   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252     a->nonew = (flg ? -2 : 0);
1253     break;
1254   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255     a->nounused = (flg ? -1 : 0);
1256     break;
1257   case MAT_NEW_DIAGONALS:
1258   case MAT_IGNORE_OFF_PROC_ENTRIES:
1259   case MAT_USE_HASH_TABLE:
1260   case MAT_SORTED_FULL:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268   case MAT_SUBMAT_SINGLEIS:
1269   case MAT_STRUCTURE_ONLY:
1270     /* These options are handled directly by MatSetOption() */
1271     break;
1272   default:
1273     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */
MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v,PetscInt * ai,PetscInt * aj,PetscScalar * aa)1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280 {
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283   MatScalar      *aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   bs2 = bs*bs;
1289   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1290 
1291   bn  = row/bs;   /* Block number */
1292   bp  = row % bs; /* Block Position */
1293   M   = ai[bn+1] - ai[bn];
1294   *nz = bs*M;
1295 
1296   if (v) {
1297     *v = NULL;
1298     if (*nz) {
1299       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1300       for (i=0; i<M; i++) { /* for each block in the block row */
1301         v_i  = *v + i*bs;
1302         aa_i = aa + bs2*(ai[bn] + i);
1303         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304       }
1305     }
1306   }
1307 
1308   if (idx) {
1309     *idx = NULL;
1310     if (*nz) {
1311       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1312       for (i=0; i<M; i++) { /* for each block in the block row */
1313         idx_i = *idx + i*bs;
1314         itmp  = bs*aj[ai[bn] + i];
1315         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1338   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1339   PetscFunctionReturn(0);
1340 }
1341 
MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat * B)1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1343 {
1344   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1345   Mat            C;
1346   PetscErrorCode ierr;
1347   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1348   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1349   MatScalar      *ata,*aa=a->a;
1350 
1351   PetscFunctionBegin;
1352   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1353   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1354     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1355 
1356     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1357     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1358     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1359     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1360 
1361     at  = (Mat_SeqBAIJ*)C->data;
1362     ati = at->i;
1363     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1364   } else {
1365     C = *B;
1366     at = (Mat_SeqBAIJ*)C->data;
1367     ati = at->i;
1368   }
1369 
1370   atj = at->j;
1371   ata = at->a;
1372 
1373   /* Copy ati into atfill so we have locations of the next free space in atj */
1374   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1375 
1376   /* Walk through A row-wise and mark nonzero entries of A^T. */
1377   for (i=0; i<mbs; i++) {
1378     anzj = ai[i+1] - ai[i];
1379     for (j=0; j<anzj; j++) {
1380       atj[atfill[*aj]] = i;
1381       for (kr=0; kr<bs; kr++) {
1382         for (k=0; k<bs; k++) {
1383           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1384         }
1385       }
1386       atfill[*aj++] += 1;
1387     }
1388   }
1389   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391 
1392   /* Clean up temporary space and complete requests. */
1393   ierr = PetscFree(atfill);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1396     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1397     *B = C;
1398   } else {
1399     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool * f)1404 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1405 {
1406   PetscErrorCode ierr;
1407   Mat            Btrans;
1408 
1409   PetscFunctionBegin;
1410   *f   = PETSC_FALSE;
1411   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1412   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1413   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)1418 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1419 {
1420   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1421   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1422   PetscInt       *rowlens,*colidxs;
1423   PetscScalar    *matvals;
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1428 
1429   M  = mat->rmap->N;
1430   N  = mat->cmap->N;
1431   m  = mat->rmap->n;
1432   bs = mat->rmap->bs;
1433   nz = bs*bs*A->nz;
1434 
1435   /* write matrix header */
1436   header[0] = MAT_FILE_CLASSID;
1437   header[1] = M; header[2] = N; header[3] = nz;
1438   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1439 
1440   /* store row lengths */
1441   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1442   for (cnt=0, i=0; i<A->mbs; i++)
1443     for (j=0; j<bs; j++)
1444       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1445   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1446   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1447 
1448   /* store column indices  */
1449   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1450   for (cnt=0, i=0; i<A->mbs; i++)
1451     for (k=0; k<bs; k++)
1452       for (j=A->i[i]; j<A->i[i+1]; j++)
1453         for (l=0; l<bs; l++)
1454           colidxs[cnt++] = bs*A->j[j] + l;
1455   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1456   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1457   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1458 
1459   /* store nonzero values */
1460   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1461   for (cnt=0, i=0; i<A->mbs; i++)
1462     for (k=0; k<bs; k++)
1463       for (j=A->i[i]; j<A->i[i+1]; j++)
1464         for (l=0; l<bs; l++)
1465           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1466   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1467   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1468   ierr = PetscFree(matvals);CHKERRQ(ierr);
1469 
1470   /* write block size option to the viewer's .info file */
1471   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)1475 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1476 {
1477   PetscErrorCode ierr;
1478   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1479   PetscInt       i,bs = A->rmap->bs,k;
1480 
1481   PetscFunctionBegin;
1482   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1483   for (i=0; i<a->mbs; i++) {
1484     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1485     for (k=a->i[i]; k<a->i[i+1]; k++) {
1486       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1487     }
1488     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1489   }
1490   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)1494 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1495 {
1496   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1497   PetscErrorCode    ierr;
1498   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1499   PetscViewerFormat format;
1500 
1501   PetscFunctionBegin;
1502   if (A->structure_only) {
1503     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1504     PetscFunctionReturn(0);
1505   }
1506 
1507   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1508   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1509     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1510   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1511     const char *matname;
1512     Mat        aij;
1513     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1514     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1515     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1516     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1517     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1518   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1519       PetscFunctionReturn(0);
1520   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1521     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1522     for (i=0; i<a->mbs; i++) {
1523       for (j=0; j<bs; j++) {
1524         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1525         for (k=a->i[i]; k<a->i[i+1]; k++) {
1526           for (l=0; l<bs; l++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1529               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1530                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1531             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1532               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1533                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1534             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             }
1537 #else
1538             if (a->a[bs2*k + l*bs + j] != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1540             }
1541 #endif
1542           }
1543         }
1544         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1545       }
1546     }
1547     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1548   } else {
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1550     for (i=0; i<a->mbs; i++) {
1551       for (j=0; j<bs; j++) {
1552         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1553         for (k=a->i[i]; k<a->i[i+1]; k++) {
1554           for (l=0; l<bs; l++) {
1555 #if defined(PETSC_USE_COMPLEX)
1556             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1557               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1558                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1559             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1560               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1561                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1562             } else {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             }
1565 #else
1566             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1567 #endif
1568           }
1569         }
1570         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1571       }
1572     }
1573     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1574   }
1575   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #include <petscdraw.h>
MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void * Aa)1580 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1581 {
1582   Mat               A = (Mat) Aa;
1583   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1584   PetscErrorCode    ierr;
1585   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1586   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1587   MatScalar         *aa;
1588   PetscViewer       viewer;
1589   PetscViewerFormat format;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1593   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1594   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1595 
1596   /* loop over matrix elements drawing boxes */
1597 
1598   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1599     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1600     /* Blue for negative, Cyan for zero and  Red for positive */
1601     color = PETSC_DRAW_BLUE;
1602     for (i=0,row=0; i<mbs; i++,row+=bs) {
1603       for (j=a->i[i]; j<a->i[i+1]; j++) {
1604         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1605         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606         aa  = a->a + j*bs2;
1607         for (k=0; k<bs; k++) {
1608           for (l=0; l<bs; l++) {
1609             if (PetscRealPart(*aa++) >=  0.) continue;
1610             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611           }
1612         }
1613       }
1614     }
1615     color = PETSC_DRAW_CYAN;
1616     for (i=0,row=0; i<mbs; i++,row+=bs) {
1617       for (j=a->i[i]; j<a->i[i+1]; j++) {
1618         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1619         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620         aa  = a->a + j*bs2;
1621         for (k=0; k<bs; k++) {
1622           for (l=0; l<bs; l++) {
1623             if (PetscRealPart(*aa++) != 0.) continue;
1624             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625           }
1626         }
1627       }
1628     }
1629     color = PETSC_DRAW_RED;
1630     for (i=0,row=0; i<mbs; i++,row+=bs) {
1631       for (j=a->i[i]; j<a->i[i+1]; j++) {
1632         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1633         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1634         aa  = a->a + j*bs2;
1635         for (k=0; k<bs; k++) {
1636           for (l=0; l<bs; l++) {
1637             if (PetscRealPart(*aa++) <= 0.) continue;
1638             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1639           }
1640         }
1641       }
1642     }
1643     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1644   } else {
1645     /* use contour shading to indicate magnitude of values */
1646     /* first determine max of all nonzero values */
1647     PetscReal minv = 0.0, maxv = 0.0;
1648     PetscDraw popup;
1649 
1650     for (i=0; i<a->nz*a->bs2; i++) {
1651       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1652     }
1653     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1654     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1655     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1656 
1657     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1658     for (i=0,row=0; i<mbs; i++,row+=bs) {
1659       for (j=a->i[i]; j<a->i[i+1]; j++) {
1660         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1661         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1662         aa  = a->a + j*bs2;
1663         for (k=0; k<bs; k++) {
1664           for (l=0; l<bs; l++) {
1665             MatScalar v = *aa++;
1666             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1667             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1668           }
1669         }
1670       }
1671     }
1672     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1673   }
1674   PetscFunctionReturn(0);
1675 }
1676 
MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)1677 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1678 {
1679   PetscErrorCode ierr;
1680   PetscReal      xl,yl,xr,yr,w,h;
1681   PetscDraw      draw;
1682   PetscBool      isnull;
1683 
1684   PetscFunctionBegin;
1685   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1686   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1687   if (isnull) PetscFunctionReturn(0);
1688 
1689   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1690   xr  += w;          yr += h;        xl = -w;     yl = -h;
1691   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1692   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1693   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1694   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1695   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
MatView_SeqBAIJ(Mat A,PetscViewer viewer)1699 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1700 {
1701   PetscErrorCode ierr;
1702   PetscBool      iascii,isbinary,isdraw;
1703 
1704   PetscFunctionBegin;
1705   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1708   if (iascii) {
1709     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1710   } else if (isbinary) {
1711     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1712   } else if (isdraw) {
1713     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1714   } else {
1715     Mat B;
1716     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1717     ierr = MatView(B,viewer);CHKERRQ(ierr);
1718     ierr = MatDestroy(&B);CHKERRQ(ierr);
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 
MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])1724 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1725 {
1726   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1727   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1728   PetscInt    *ai = a->i,*ailen = a->ilen;
1729   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1730   MatScalar   *ap,*aa = a->a;
1731 
1732   PetscFunctionBegin;
1733   for (k=0; k<m; k++) { /* loop over rows */
1734     row = im[k]; brow = row/bs;
1735     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1736     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1737     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1738     nrow = ailen[brow];
1739     for (l=0; l<n; l++) { /* loop over columns */
1740       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1741       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1742       col  = in[l];
1743       bcol = col/bs;
1744       cidx = col%bs;
1745       ridx = row%bs;
1746       high = nrow;
1747       low  = 0; /* assume unsorted */
1748       while (high-low > 5) {
1749         t = (low+high)/2;
1750         if (rp[t] > bcol) high = t;
1751         else             low  = t;
1752       }
1753       for (i=low; i<high; i++) {
1754         if (rp[i] > bcol) break;
1755         if (rp[i] == bcol) {
1756           *v++ = ap[bs2*i+bs*cidx+ridx];
1757           goto finished;
1758         }
1759       }
1760       *v++ = 0.0;
1761 finished:;
1762     }
1763   }
1764   PetscFunctionReturn(0);
1765 }
1766 
MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)1767 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1768 {
1769   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1770   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1771   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1772   PetscErrorCode    ierr;
1773   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1774   PetscBool         roworiented=a->roworiented;
1775   const PetscScalar *value     = v;
1776   MatScalar         *ap=NULL,*aa = a->a,*bap;
1777 
1778   PetscFunctionBegin;
1779   if (roworiented) {
1780     stepval = (n-1)*bs;
1781   } else {
1782     stepval = (m-1)*bs;
1783   }
1784   for (k=0; k<m; k++) { /* loop over added rows */
1785     row = im[k];
1786     if (row < 0) continue;
1787     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1788     rp   = aj + ai[row];
1789     if (!A->structure_only) ap = aa + bs2*ai[row];
1790     rmax = imax[row];
1791     nrow = ailen[row];
1792     low  = 0;
1793     high = nrow;
1794     for (l=0; l<n; l++) { /* loop over added columns */
1795       if (in[l] < 0) continue;
1796       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1797       col = in[l];
1798       if (!A->structure_only) {
1799         if (roworiented) {
1800           value = v + (k*(stepval+bs) + l)*bs;
1801         } else {
1802           value = v + (l*(stepval+bs) + k)*bs;
1803         }
1804       }
1805       if (col <= lastcol) low = 0;
1806       else high = nrow;
1807       lastcol = col;
1808       while (high-low > 7) {
1809         t = (low+high)/2;
1810         if (rp[t] > col) high = t;
1811         else             low  = t;
1812       }
1813       for (i=low; i<high; i++) {
1814         if (rp[i] > col) break;
1815         if (rp[i] == col) {
1816           if (A->structure_only) goto noinsert2;
1817           bap = ap +  bs2*i;
1818           if (roworiented) {
1819             if (is == ADD_VALUES) {
1820               for (ii=0; ii<bs; ii++,value+=stepval) {
1821                 for (jj=ii; jj<bs2; jj+=bs) {
1822                   bap[jj] += *value++;
1823                 }
1824               }
1825             } else {
1826               for (ii=0; ii<bs; ii++,value+=stepval) {
1827                 for (jj=ii; jj<bs2; jj+=bs) {
1828                   bap[jj] = *value++;
1829                 }
1830               }
1831             }
1832           } else {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1835                 for (jj=0; jj<bs; jj++) {
1836                   bap[jj] += value[jj];
1837                 }
1838                 bap += bs;
1839               }
1840             } else {
1841               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1842                 for (jj=0; jj<bs; jj++) {
1843                   bap[jj]  = value[jj];
1844                 }
1845                 bap += bs;
1846               }
1847             }
1848           }
1849           goto noinsert2;
1850         }
1851       }
1852       if (nonew == 1) goto noinsert2;
1853       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1854       if (A->structure_only) {
1855         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1856       } else {
1857         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1858       }
1859       N = nrow++ - 1; high++;
1860       /* shift up all the later entries in this row */
1861       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1862       rp[i] = col;
1863       if (!A->structure_only) {
1864         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1865         bap   = ap +  bs2*i;
1866         if (roworiented) {
1867           for (ii=0; ii<bs; ii++,value+=stepval) {
1868             for (jj=ii; jj<bs2; jj+=bs) {
1869               bap[jj] = *value++;
1870             }
1871           }
1872         } else {
1873           for (ii=0; ii<bs; ii++,value+=stepval) {
1874             for (jj=0; jj<bs; jj++) {
1875               *bap++ = *value++;
1876             }
1877           }
1878         }
1879       }
1880 noinsert2:;
1881       low = i;
1882     }
1883     ailen[row] = nrow;
1884   }
1885   PetscFunctionReturn(0);
1886 }
1887 
MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)1888 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1889 {
1890   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1891   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1892   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1893   PetscErrorCode ierr;
1894   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1895   MatScalar      *aa  = a->a,*ap;
1896   PetscReal      ratio=0.6;
1897 
1898   PetscFunctionBegin;
1899   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1900 
1901   if (m) rmax = ailen[0];
1902   for (i=1; i<mbs; i++) {
1903     /* move each row back by the amount of empty slots (fshift) before it*/
1904     fshift += imax[i-1] - ailen[i-1];
1905     rmax    = PetscMax(rmax,ailen[i]);
1906     if (fshift) {
1907       ip = aj + ai[i];
1908       ap = aa + bs2*ai[i];
1909       N  = ailen[i];
1910       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1911       if (!A->structure_only) {
1912         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1913       }
1914     }
1915     ai[i] = ai[i-1] + ailen[i-1];
1916   }
1917   if (mbs) {
1918     fshift += imax[mbs-1] - ailen[mbs-1];
1919     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1920   }
1921 
1922   /* reset ilen and imax for each row */
1923   a->nonzerorowcnt = 0;
1924   if (A->structure_only) {
1925     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1926   } else { /* !A->structure_only */
1927     for (i=0; i<mbs; i++) {
1928       ailen[i] = imax[i] = ai[i+1] - ai[i];
1929       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1930     }
1931   }
1932   a->nz = ai[mbs];
1933 
1934   /* diagonals may have moved, so kill the diagonal pointers */
1935   a->idiagvalid = PETSC_FALSE;
1936   if (fshift && a->diag) {
1937     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1938     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1939     a->diag = NULL;
1940   }
1941   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1942   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1943   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1944   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1945 
1946   A->info.mallocs    += a->reallocs;
1947   a->reallocs         = 0;
1948   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1949   a->rmax             = rmax;
1950 
1951   if (!A->structure_only) {
1952     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 /*
1958    This function returns an array of flags which indicate the locations of contiguous
1959    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1960    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1961    Assume: sizes should be long enough to hold all the values.
1962 */
MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[],PetscInt * bs_max)1963 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1964 {
1965   PetscInt  i,j,k,row;
1966   PetscBool flg;
1967 
1968   PetscFunctionBegin;
1969   for (i=0,j=0; i<n; j++) {
1970     row = idx[i];
1971     if (row%bs!=0) { /* Not the begining of a block */
1972       sizes[j] = 1;
1973       i++;
1974     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1975       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1976       i++;
1977     } else { /* Begining of the block, so check if the complete block exists */
1978       flg = PETSC_TRUE;
1979       for (k=1; k<bs; k++) {
1980         if (row+k != idx[i+k]) { /* break in the block */
1981           flg = PETSC_FALSE;
1982           break;
1983         }
1984       }
1985       if (flg) { /* No break in the bs */
1986         sizes[j] = bs;
1987         i       += bs;
1988       } else {
1989         sizes[j] = 1;
1990         i++;
1991       }
1992     }
1993   }
1994   *bs_max = j;
1995   PetscFunctionReturn(0);
1996 }
1997 
MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)1998 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1999 {
2000   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2001   PetscErrorCode    ierr;
2002   PetscInt          i,j,k,count,*rows;
2003   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2004   PetscScalar       zero = 0.0;
2005   MatScalar         *aa;
2006   const PetscScalar *xx;
2007   PetscScalar       *bb;
2008 
2009   PetscFunctionBegin;
2010   /* fix right hand side if needed */
2011   if (x && b) {
2012     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2013     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2014     for (i=0; i<is_n; i++) {
2015       bb[is_idx[i]] = diag*xx[is_idx[i]];
2016     }
2017     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2018     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2019   }
2020 
2021   /* Make a copy of the IS and  sort it */
2022   /* allocate memory for rows,sizes */
2023   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2024 
2025   /* copy IS values to rows, and sort them */
2026   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2027   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2028 
2029   if (baij->keepnonzeropattern) {
2030     for (i=0; i<is_n; i++) sizes[i] = 1;
2031     bs_max          = is_n;
2032   } else {
2033     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2034     A->nonzerostate++;
2035   }
2036 
2037   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2038     row = rows[j];
2039     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2040     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2041     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2042     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2043       if (diag != (PetscScalar)0.0) {
2044         if (baij->ilen[row/bs] > 0) {
2045           baij->ilen[row/bs]       = 1;
2046           baij->j[baij->i[row/bs]] = row/bs;
2047 
2048           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2049         }
2050         /* Now insert all the diagonal values for this bs */
2051         for (k=0; k<bs; k++) {
2052           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2053         }
2054       } else { /* (diag == 0.0) */
2055         baij->ilen[row/bs] = 0;
2056       } /* end (diag == 0.0) */
2057     } else { /* (sizes[i] != bs) */
2058       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2059       for (k=0; k<count; k++) {
2060         aa[0] =  zero;
2061         aa   += bs;
2062       }
2063       if (diag != (PetscScalar)0.0) {
2064         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2065       }
2066     }
2067   }
2068 
2069   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2070   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2074 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2075 {
2076   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2077   PetscErrorCode    ierr;
2078   PetscInt          i,j,k,count;
2079   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2080   PetscScalar       zero = 0.0;
2081   MatScalar         *aa;
2082   const PetscScalar *xx;
2083   PetscScalar       *bb;
2084   PetscBool         *zeroed,vecs = PETSC_FALSE;
2085 
2086   PetscFunctionBegin;
2087   /* fix right hand side if needed */
2088   if (x && b) {
2089     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2090     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2091     vecs = PETSC_TRUE;
2092   }
2093 
2094   /* zero the columns */
2095   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2096   for (i=0; i<is_n; i++) {
2097     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2098     zeroed[is_idx[i]] = PETSC_TRUE;
2099   }
2100   for (i=0; i<A->rmap->N; i++) {
2101     if (!zeroed[i]) {
2102       row = i/bs;
2103       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2104         for (k=0; k<bs; k++) {
2105           col = bs*baij->j[j] + k;
2106           if (zeroed[col]) {
2107             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2108             if (vecs) bb[i] -= aa[0]*xx[col];
2109             aa[0] = 0.0;
2110           }
2111         }
2112       }
2113     } else if (vecs) bb[i] = diag*xx[i];
2114   }
2115   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2116   if (vecs) {
2117     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2118     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2119   }
2120 
2121   /* zero the rows */
2122   for (i=0; i<is_n; i++) {
2123     row   = is_idx[i];
2124     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2125     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2126     for (k=0; k<count; k++) {
2127       aa[0] =  zero;
2128       aa   += bs;
2129     }
2130     if (diag != (PetscScalar)0.0) {
2131       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2132     }
2133   }
2134   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 
MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2138 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2139 {
2140   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2141   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2142   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2143   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2144   PetscErrorCode ierr;
2145   PetscInt       ridx,cidx,bs2=a->bs2;
2146   PetscBool      roworiented=a->roworiented;
2147   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2148 
2149   PetscFunctionBegin;
2150   for (k=0; k<m; k++) { /* loop over added rows */
2151     row  = im[k];
2152     brow = row/bs;
2153     if (row < 0) continue;
2154     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2155     rp   = aj + ai[brow];
2156     if (!A->structure_only) ap = aa + bs2*ai[brow];
2157     rmax = imax[brow];
2158     nrow = ailen[brow];
2159     low  = 0;
2160     high = nrow;
2161     for (l=0; l<n; l++) { /* loop over added columns */
2162       if (in[l] < 0) continue;
2163       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2164       col  = in[l]; bcol = col/bs;
2165       ridx = row % bs; cidx = col % bs;
2166       if (!A->structure_only) {
2167         if (roworiented) {
2168           value = v[l + k*n];
2169         } else {
2170           value = v[k + l*m];
2171         }
2172       }
2173       if (col <= lastcol) low = 0; else high = nrow;
2174       lastcol = col;
2175       while (high-low > 7) {
2176         t = (low+high)/2;
2177         if (rp[t] > bcol) high = t;
2178         else              low  = t;
2179       }
2180       for (i=low; i<high; i++) {
2181         if (rp[i] > bcol) break;
2182         if (rp[i] == bcol) {
2183           bap = ap +  bs2*i + bs*cidx + ridx;
2184           if (!A->structure_only) {
2185             if (is == ADD_VALUES) *bap += value;
2186             else                  *bap  = value;
2187           }
2188           goto noinsert1;
2189         }
2190       }
2191       if (nonew == 1) goto noinsert1;
2192       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2193       if (A->structure_only) {
2194         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2195       } else {
2196         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2197       }
2198       N = nrow++ - 1; high++;
2199       /* shift up all the later entries in this row */
2200       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2201       rp[i] = bcol;
2202       if (!A->structure_only) {
2203         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2204         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2205         ap[bs2*i + bs*cidx + ridx] = value;
2206       }
2207       a->nz++;
2208       A->nonzerostate++;
2209 noinsert1:;
2210       low = i;
2211     }
2212     ailen[brow] = nrow;
2213   }
2214   PetscFunctionReturn(0);
2215 }
2216 
MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo * info)2217 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2218 {
2219   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2220   Mat            outA;
2221   PetscErrorCode ierr;
2222   PetscBool      row_identity,col_identity;
2223 
2224   PetscFunctionBegin;
2225   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2226   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2227   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2228   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2229 
2230   outA            = inA;
2231   inA->factortype = MAT_FACTOR_LU;
2232   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2233   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2234 
2235   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2236 
2237   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2238   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2239   a->row = row;
2240   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2241   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2242   a->col = col;
2243 
2244   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2245   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2246   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2247   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2248 
2249   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2250   if (!a->solve_work) {
2251     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2252     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2253   }
2254   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt * indices)2258 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2259 {
2260   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2261   PetscInt    i,nz,mbs;
2262 
2263   PetscFunctionBegin;
2264   nz  = baij->maxnz;
2265   mbs = baij->mbs;
2266   for (i=0; i<nz; i++) {
2267     baij->j[i] = indices[i];
2268   }
2269   baij->nz = nz;
2270   for (i=0; i<mbs; i++) {
2271     baij->ilen[i] = baij->imax[i];
2272   }
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 /*@
2277     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2278        in the matrix.
2279 
2280   Input Parameters:
2281 +  mat - the SeqBAIJ matrix
2282 -  indices - the column indices
2283 
2284   Level: advanced
2285 
2286   Notes:
2287     This can be called if you have precomputed the nonzero structure of the
2288   matrix and want to provide it to the matrix object to improve the performance
2289   of the MatSetValues() operation.
2290 
2291     You MUST have set the correct numbers of nonzeros per row in the call to
2292   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2293 
2294     MUST be called before any calls to MatSetValues();
2295 
2296 @*/
MatSeqBAIJSetColumnIndices(Mat mat,PetscInt * indices)2297 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2303   PetscValidPointer(indices,2);
2304   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])2308 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2309 {
2310   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2311   PetscErrorCode ierr;
2312   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2313   PetscReal      atmp;
2314   PetscScalar    *x,zero = 0.0;
2315   MatScalar      *aa;
2316   PetscInt       ncols,brow,krow,kcol;
2317 
2318   PetscFunctionBegin;
2319   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2320   bs  = A->rmap->bs;
2321   aa  = a->a;
2322   ai  = a->i;
2323   aj  = a->j;
2324   mbs = a->mbs;
2325 
2326   ierr = VecSet(v,zero);CHKERRQ(ierr);
2327   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2328   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2329   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2330   for (i=0; i<mbs; i++) {
2331     ncols = ai[1] - ai[0]; ai++;
2332     brow  = bs*i;
2333     for (j=0; j<ncols; j++) {
2334       for (kcol=0; kcol<bs; kcol++) {
2335         for (krow=0; krow<bs; krow++) {
2336           atmp = PetscAbsScalar(*aa);aa++;
2337           row  = brow + krow;   /* row index */
2338           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2339         }
2340       }
2341       aj++;
2342     }
2343   }
2344   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 
MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)2348 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2349 {
2350   PetscErrorCode ierr;
2351 
2352   PetscFunctionBegin;
2353   /* If the two matrices have the same copy implementation, use fast copy. */
2354   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2355     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2356     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2357     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2358 
2359     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2360     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2361     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2362     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2363   } else {
2364     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
MatSetUp_SeqBAIJ(Mat A)2369 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2370 {
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar * array[])2378 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2379 {
2380   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2381 
2382   PetscFunctionBegin;
2383   *array = a->a;
2384   PetscFunctionReturn(0);
2385 }
2386 
MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar * array[])2387 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2388 {
2389   PetscFunctionBegin;
2390   *array = NULL;
2391   PetscFunctionReturn(0);
2392 }
2393 
MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt * nnz)2394 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2395 {
2396   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2397   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2398   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   /* Set the number of nonzeros in the new matrix */
2403   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2407 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2408 {
2409   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2410   PetscErrorCode ierr;
2411   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2412   PetscBLASInt   one=1;
2413 
2414   PetscFunctionBegin;
2415   if (str == SAME_NONZERO_PATTERN) {
2416     PetscScalar  alpha = a;
2417     PetscBLASInt bnz;
2418     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2419     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2420     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2421   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2422     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2423   } else {
2424     Mat      B;
2425     PetscInt *nnz;
2426     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2427     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2428     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2429     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2430     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2431     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2432     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2433     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2434     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2435     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2436     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2437     ierr = PetscFree(nnz);CHKERRQ(ierr);
2438   }
2439   PetscFunctionReturn(0);
2440 }
2441 
MatRealPart_SeqBAIJ(Mat A)2442 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2443 {
2444   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2445   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2446   MatScalar   *aa = a->a;
2447 
2448   PetscFunctionBegin;
2449   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2450   PetscFunctionReturn(0);
2451 }
2452 
MatImaginaryPart_SeqBAIJ(Mat A)2453 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2454 {
2455   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2456   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2457   MatScalar   *aa = a->a;
2458 
2459   PetscFunctionBegin;
2460   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 /*
2465     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2466 */
MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2467 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2468 {
2469   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2470   PetscErrorCode ierr;
2471   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2472   PetscInt       nz = a->i[m],row,*jj,mr,col;
2473 
2474   PetscFunctionBegin;
2475   *nn = n;
2476   if (!ia) PetscFunctionReturn(0);
2477   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2478   else {
2479     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2480     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2481     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2482     jj   = a->j;
2483     for (i=0; i<nz; i++) {
2484       collengths[jj[i]]++;
2485     }
2486     cia[0] = oshift;
2487     for (i=0; i<n; i++) {
2488       cia[i+1] = cia[i] + collengths[i];
2489     }
2490     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2491     jj   = a->j;
2492     for (row=0; row<m; row++) {
2493       mr = a->i[row+1] - a->i[row];
2494       for (i=0; i<mr; i++) {
2495         col = *jj++;
2496 
2497         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2498       }
2499     }
2500     ierr = PetscFree(collengths);CHKERRQ(ierr);
2501     *ia  = cia; *ja = cja;
2502   }
2503   PetscFunctionReturn(0);
2504 }
2505 
MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2506 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   if (!ia) PetscFunctionReturn(0);
2512   ierr = PetscFree(*ia);CHKERRQ(ierr);
2513   ierr = PetscFree(*ja);CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 /*
2518  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2519  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2520  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2521  */
MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2522 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2523 {
2524   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2525   PetscErrorCode ierr;
2526   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2527   PetscInt       nz = a->i[m],row,*jj,mr,col;
2528   PetscInt       *cspidx;
2529 
2530   PetscFunctionBegin;
2531   *nn = n;
2532   if (!ia) PetscFunctionReturn(0);
2533 
2534   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2535   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2536   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2537   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2538   jj   = a->j;
2539   for (i=0; i<nz; i++) {
2540     collengths[jj[i]]++;
2541   }
2542   cia[0] = oshift;
2543   for (i=0; i<n; i++) {
2544     cia[i+1] = cia[i] + collengths[i];
2545   }
2546   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2547   jj   = a->j;
2548   for (row=0; row<m; row++) {
2549     mr = a->i[row+1] - a->i[row];
2550     for (i=0; i<mr; i++) {
2551       col = *jj++;
2552       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2553       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2554     }
2555   }
2556   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2557   *ia    = cia;
2558   *ja    = cja;
2559   *spidx = cspidx;
2560   PetscFunctionReturn(0);
2561 }
2562 
MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2563 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2564 {
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2569   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
MatShift_SeqBAIJ(Mat Y,PetscScalar a)2573 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2574 {
2575   PetscErrorCode ierr;
2576   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2577 
2578   PetscFunctionBegin;
2579   if (!Y->preallocated || !aij->nz) {
2580     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2581   }
2582   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 /* -------------------------------------------------------------------*/
2587 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2588                                        MatGetRow_SeqBAIJ,
2589                                        MatRestoreRow_SeqBAIJ,
2590                                        MatMult_SeqBAIJ_N,
2591                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2592                                        MatMultTranspose_SeqBAIJ,
2593                                        MatMultTransposeAdd_SeqBAIJ,
2594                                        NULL,
2595                                        NULL,
2596                                        NULL,
2597                                /* 10*/ NULL,
2598                                        MatLUFactor_SeqBAIJ,
2599                                        NULL,
2600                                        NULL,
2601                                        MatTranspose_SeqBAIJ,
2602                                /* 15*/ MatGetInfo_SeqBAIJ,
2603                                        MatEqual_SeqBAIJ,
2604                                        MatGetDiagonal_SeqBAIJ,
2605                                        MatDiagonalScale_SeqBAIJ,
2606                                        MatNorm_SeqBAIJ,
2607                                /* 20*/ NULL,
2608                                        MatAssemblyEnd_SeqBAIJ,
2609                                        MatSetOption_SeqBAIJ,
2610                                        MatZeroEntries_SeqBAIJ,
2611                                /* 24*/ MatZeroRows_SeqBAIJ,
2612                                        NULL,
2613                                        NULL,
2614                                        NULL,
2615                                        NULL,
2616                                /* 29*/ MatSetUp_SeqBAIJ,
2617                                        NULL,
2618                                        NULL,
2619                                        NULL,
2620                                        NULL,
2621                                /* 34*/ MatDuplicate_SeqBAIJ,
2622                                        NULL,
2623                                        NULL,
2624                                        MatILUFactor_SeqBAIJ,
2625                                        NULL,
2626                                /* 39*/ MatAXPY_SeqBAIJ,
2627                                        MatCreateSubMatrices_SeqBAIJ,
2628                                        MatIncreaseOverlap_SeqBAIJ,
2629                                        MatGetValues_SeqBAIJ,
2630                                        MatCopy_SeqBAIJ,
2631                                /* 44*/ NULL,
2632                                        MatScale_SeqBAIJ,
2633                                        MatShift_SeqBAIJ,
2634                                        NULL,
2635                                        MatZeroRowsColumns_SeqBAIJ,
2636                                /* 49*/ NULL,
2637                                        MatGetRowIJ_SeqBAIJ,
2638                                        MatRestoreRowIJ_SeqBAIJ,
2639                                        MatGetColumnIJ_SeqBAIJ,
2640                                        MatRestoreColumnIJ_SeqBAIJ,
2641                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2642                                        NULL,
2643                                        NULL,
2644                                        NULL,
2645                                        MatSetValuesBlocked_SeqBAIJ,
2646                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2647                                        MatDestroy_SeqBAIJ,
2648                                        MatView_SeqBAIJ,
2649                                        NULL,
2650                                        NULL,
2651                                /* 64*/ NULL,
2652                                        NULL,
2653                                        NULL,
2654                                        NULL,
2655                                        NULL,
2656                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2657                                        NULL,
2658                                        MatConvert_Basic,
2659                                        NULL,
2660                                        NULL,
2661                                /* 74*/ NULL,
2662                                        MatFDColoringApply_BAIJ,
2663                                        NULL,
2664                                        NULL,
2665                                        NULL,
2666                                /* 79*/ NULL,
2667                                        NULL,
2668                                        NULL,
2669                                        NULL,
2670                                        MatLoad_SeqBAIJ,
2671                                /* 84*/ NULL,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                /* 89*/ NULL,
2677                                        NULL,
2678                                        NULL,
2679                                        NULL,
2680                                        NULL,
2681                                /* 94*/ NULL,
2682                                        NULL,
2683                                        NULL,
2684                                        NULL,
2685                                        NULL,
2686                                /* 99*/ NULL,
2687                                        NULL,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                /*104*/ NULL,
2692                                        MatRealPart_SeqBAIJ,
2693                                        MatImaginaryPart_SeqBAIJ,
2694                                        NULL,
2695                                        NULL,
2696                                /*109*/ NULL,
2697                                        NULL,
2698                                        NULL,
2699                                        NULL,
2700                                        MatMissingDiagonal_SeqBAIJ,
2701                                /*114*/ NULL,
2702                                        NULL,
2703                                        NULL,
2704                                        NULL,
2705                                        NULL,
2706                                /*119*/ NULL,
2707                                        NULL,
2708                                        MatMultHermitianTranspose_SeqBAIJ,
2709                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2710                                        NULL,
2711                                /*124*/ NULL,
2712                                        NULL,
2713                                        MatInvertBlockDiagonal_SeqBAIJ,
2714                                        NULL,
2715                                        NULL,
2716                                /*129*/ NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                /*134*/ NULL,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                /*139*/ MatSetBlockSizes_Default,
2727                                        NULL,
2728                                        NULL,
2729                                        MatFDColoringSetUp_SeqXAIJ,
2730                                        NULL,
2731                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2732                                        MatDestroySubMatrices_SeqBAIJ
2733 };
2734 
MatStoreValues_SeqBAIJ(Mat mat)2735 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2736 {
2737   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2738   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2739   PetscErrorCode ierr;
2740 
2741   PetscFunctionBegin;
2742   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2743 
2744   /* allocate space for values if not already there */
2745   if (!aij->saved_values) {
2746     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2747     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2748   }
2749 
2750   /* copy values over */
2751   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 
MatRetrieveValues_SeqBAIJ(Mat mat)2755 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2756 {
2757   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2758   PetscErrorCode ierr;
2759   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2760 
2761   PetscFunctionBegin;
2762   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2763   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2764 
2765   /* copy values over */
2766   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2771 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2772 
MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt * nnz)2773 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2774 {
2775   Mat_SeqBAIJ    *b;
2776   PetscErrorCode ierr;
2777   PetscInt       i,mbs,nbs,bs2;
2778   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2779 
2780   PetscFunctionBegin;
2781   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2782   if (nz == MAT_SKIP_ALLOCATION) {
2783     skipallocation = PETSC_TRUE;
2784     nz             = 0;
2785   }
2786 
2787   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2788   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2789   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2790   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2791 
2792   B->preallocated = PETSC_TRUE;
2793 
2794   mbs = B->rmap->n/bs;
2795   nbs = B->cmap->n/bs;
2796   bs2 = bs*bs;
2797 
2798   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2799 
2800   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2801   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2802   if (nnz) {
2803     for (i=0; i<mbs; i++) {
2804       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2805       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2806     }
2807   }
2808 
2809   b    = (Mat_SeqBAIJ*)B->data;
2810   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2811   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2812   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2813 
2814   if (!flg) {
2815     switch (bs) {
2816     case 1:
2817       B->ops->mult    = MatMult_SeqBAIJ_1;
2818       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2819       break;
2820     case 2:
2821       B->ops->mult    = MatMult_SeqBAIJ_2;
2822       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2823       break;
2824     case 3:
2825       B->ops->mult    = MatMult_SeqBAIJ_3;
2826       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2827       break;
2828     case 4:
2829       B->ops->mult    = MatMult_SeqBAIJ_4;
2830       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2831       break;
2832     case 5:
2833       B->ops->mult    = MatMult_SeqBAIJ_5;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2835       break;
2836     case 6:
2837       B->ops->mult    = MatMult_SeqBAIJ_6;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2839       break;
2840     case 7:
2841       B->ops->mult    = MatMult_SeqBAIJ_7;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2843       break;
2844     case 9:
2845 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2846       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2847       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2848 #else
2849       B->ops->mult    = MatMult_SeqBAIJ_N;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2851 #endif
2852       break;
2853     case 11:
2854       B->ops->mult    = MatMult_SeqBAIJ_11;
2855       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2856       break;
2857     case 15:
2858       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2859       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2860       break;
2861     default:
2862       B->ops->mult    = MatMult_SeqBAIJ_N;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2864       break;
2865     }
2866   }
2867   B->ops->sor = MatSOR_SeqBAIJ;
2868   b->mbs = mbs;
2869   b->nbs = nbs;
2870   if (!skipallocation) {
2871     if (!b->imax) {
2872       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2873       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2874 
2875       b->free_imax_ilen = PETSC_TRUE;
2876     }
2877     /* b->ilen will count nonzeros in each block row so far. */
2878     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2879     if (!nnz) {
2880       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2881       else if (nz < 0) nz = 1;
2882       nz = PetscMin(nz,nbs);
2883       for (i=0; i<mbs; i++) b->imax[i] = nz;
2884       nz = nz*mbs;
2885     } else {
2886       PetscInt64 nz64 = 0;
2887       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2888       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2889     }
2890 
2891     /* allocate the matrix space */
2892     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2893     if (B->structure_only) {
2894       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2895       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2896       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2897     } else {
2898       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2899       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2900       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2901     }
2902     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2903 
2904     if (B->structure_only) {
2905       b->singlemalloc = PETSC_FALSE;
2906       b->free_a       = PETSC_FALSE;
2907     } else {
2908       b->singlemalloc = PETSC_TRUE;
2909       b->free_a       = PETSC_TRUE;
2910     }
2911     b->free_ij = PETSC_TRUE;
2912 
2913     b->i[0] = 0;
2914     for (i=1; i<mbs+1; i++) {
2915       b->i[i] = b->i[i-1] + b->imax[i-1];
2916     }
2917 
2918   } else {
2919     b->free_a  = PETSC_FALSE;
2920     b->free_ij = PETSC_FALSE;
2921   }
2922 
2923   b->bs2              = bs2;
2924   b->mbs              = mbs;
2925   b->nz               = 0;
2926   b->maxnz            = nz;
2927   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2928   B->was_assembled    = PETSC_FALSE;
2929   B->assembled        = PETSC_FALSE;
2930   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2931   PetscFunctionReturn(0);
2932 }
2933 
MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2934 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2935 {
2936   PetscInt       i,m,nz,nz_max=0,*nnz;
2937   PetscScalar    *values=NULL;
2938   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2943   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2944   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2945   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2946   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2947   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2948   m    = B->rmap->n/bs;
2949 
2950   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2951   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2952   for (i=0; i<m; i++) {
2953     nz = ii[i+1]- ii[i];
2954     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2955     nz_max = PetscMax(nz_max, nz);
2956     nnz[i] = nz;
2957   }
2958   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2959   ierr = PetscFree(nnz);CHKERRQ(ierr);
2960 
2961   values = (PetscScalar*)V;
2962   if (!values) {
2963     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2964   }
2965   for (i=0; i<m; i++) {
2966     PetscInt          ncols  = ii[i+1] - ii[i];
2967     const PetscInt    *icols = jj + ii[i];
2968     if (bs == 1 || !roworiented) {
2969       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2970       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2971     } else {
2972       PetscInt j;
2973       for (j=0; j<ncols; j++) {
2974         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2975         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2976       }
2977     }
2978   }
2979   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2980   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2981   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2982   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2983   PetscFunctionReturn(0);
2984 }
2985 
2986 /*@C
2987    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
2988 
2989    Not Collective
2990 
2991    Input Parameter:
2992 .  mat - a MATSEQBAIJ matrix
2993 
2994    Output Parameter:
2995 .   array - pointer to the data
2996 
2997    Level: intermediate
2998 
2999 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3000 @*/
MatSeqBAIJGetArray(Mat A,PetscScalar ** array)3001 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3002 {
3003   PetscErrorCode ierr;
3004 
3005   PetscFunctionBegin;
3006   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 /*@C
3011    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3012 
3013    Not Collective
3014 
3015    Input Parameters:
3016 +  mat - a MATSEQBAIJ matrix
3017 -  array - pointer to the data
3018 
3019    Level: intermediate
3020 
3021 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3022 @*/
MatSeqBAIJRestoreArray(Mat A,PetscScalar ** array)3023 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3024 {
3025   PetscErrorCode ierr;
3026 
3027   PetscFunctionBegin;
3028   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 /*MC
3033    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3034    block sparse compressed row format.
3035 
3036    Options Database Keys:
3037 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3038 
3039    Level: beginner
3040 
3041    Notes:
3042     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3043     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3044 
3045 .seealso: MatCreateSeqBAIJ()
3046 M*/
3047 
3048 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3049 
MatCreate_SeqBAIJ(Mat B)3050 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3051 {
3052   PetscErrorCode ierr;
3053   PetscMPIInt    size;
3054   Mat_SeqBAIJ    *b;
3055 
3056   PetscFunctionBegin;
3057   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3058   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3059 
3060   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3061   B->data = (void*)b;
3062   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3063 
3064   b->row          = NULL;
3065   b->col          = NULL;
3066   b->icol         = NULL;
3067   b->reallocs     = 0;
3068   b->saved_values = NULL;
3069 
3070   b->roworiented        = PETSC_TRUE;
3071   b->nonew              = 0;
3072   b->diag               = NULL;
3073   B->spptr              = NULL;
3074   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3075   b->keepnonzeropattern = PETSC_FALSE;
3076 
3077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3088 #if defined(PETSC_HAVE_HYPRE)
3089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3090 #endif
3091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3092   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3093   PetscFunctionReturn(0);
3094 }
3095 
MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)3096 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3097 {
3098   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3099   PetscErrorCode ierr;
3100   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3101 
3102   PetscFunctionBegin;
3103   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3104 
3105   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3106     c->imax           = a->imax;
3107     c->ilen           = a->ilen;
3108     c->free_imax_ilen = PETSC_FALSE;
3109   } else {
3110     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3111     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3112     for (i=0; i<mbs; i++) {
3113       c->imax[i] = a->imax[i];
3114       c->ilen[i] = a->ilen[i];
3115     }
3116     c->free_imax_ilen = PETSC_TRUE;
3117   }
3118 
3119   /* allocate the matrix space */
3120   if (mallocmatspace) {
3121     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3122       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3123       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3124 
3125       c->i            = a->i;
3126       c->j            = a->j;
3127       c->singlemalloc = PETSC_FALSE;
3128       c->free_a       = PETSC_TRUE;
3129       c->free_ij      = PETSC_FALSE;
3130       c->parent       = A;
3131       C->preallocated = PETSC_TRUE;
3132       C->assembled    = PETSC_TRUE;
3133 
3134       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3135       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3136       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3137     } else {
3138       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3139       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3140 
3141       c->singlemalloc = PETSC_TRUE;
3142       c->free_a       = PETSC_TRUE;
3143       c->free_ij      = PETSC_TRUE;
3144 
3145       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3146       if (mbs > 0) {
3147         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3148         if (cpvalues == MAT_COPY_VALUES) {
3149           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3150         } else {
3151           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3152         }
3153       }
3154       C->preallocated = PETSC_TRUE;
3155       C->assembled    = PETSC_TRUE;
3156     }
3157   }
3158 
3159   c->roworiented = a->roworiented;
3160   c->nonew       = a->nonew;
3161 
3162   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3163   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3164 
3165   c->bs2         = a->bs2;
3166   c->mbs         = a->mbs;
3167   c->nbs         = a->nbs;
3168 
3169   if (a->diag) {
3170     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3171       c->diag      = a->diag;
3172       c->free_diag = PETSC_FALSE;
3173     } else {
3174       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3175       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3176       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3177       c->free_diag = PETSC_TRUE;
3178     }
3179   } else c->diag = NULL;
3180 
3181   c->nz         = a->nz;
3182   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3183   c->solve_work = NULL;
3184   c->mult_work  = NULL;
3185   c->sor_workt  = NULL;
3186   c->sor_work   = NULL;
3187 
3188   c->compressedrow.use   = a->compressedrow.use;
3189   c->compressedrow.nrows = a->compressedrow.nrows;
3190   if (a->compressedrow.use) {
3191     i    = a->compressedrow.nrows;
3192     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3193     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3194     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3195     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3196   } else {
3197     c->compressedrow.use    = PETSC_FALSE;
3198     c->compressedrow.i      = NULL;
3199     c->compressedrow.rindex = NULL;
3200   }
3201   C->nonzerostate = A->nonzerostate;
3202 
3203   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3204   PetscFunctionReturn(0);
3205 }
3206 
MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat * B)3207 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3208 {
3209   PetscErrorCode ierr;
3210 
3211   PetscFunctionBegin;
3212   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3213   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3214   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3215   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)3220 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3221 {
3222   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3223   PetscInt       *rowidxs,*colidxs;
3224   PetscScalar    *matvals;
3225   PetscErrorCode ierr;
3226 
3227   PetscFunctionBegin;
3228   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3229 
3230   /* read matrix header */
3231   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3232   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3233   M = header[1]; N = header[2]; nz = header[3];
3234   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3235   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3236   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3237 
3238   /* set block sizes from the viewer's .info file */
3239   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3240   /* set local and global sizes if not set already */
3241   if (mat->rmap->n < 0) mat->rmap->n = M;
3242   if (mat->cmap->n < 0) mat->cmap->n = N;
3243   if (mat->rmap->N < 0) mat->rmap->N = M;
3244   if (mat->cmap->N < 0) mat->cmap->N = N;
3245   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3246   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3247 
3248   /* check if the matrix sizes are correct */
3249   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3250   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3251   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3252   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3253   mbs = m/bs; nbs = n/bs;
3254 
3255   /* read in row lengths, column indices and nonzero values */
3256   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3257   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3258   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3259   sum = rowidxs[m];
3260   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3261 
3262   /* read in column indices and nonzero values */
3263   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3264   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3265   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3266 
3267   { /* preallocate matrix storage */
3268     PetscBT   bt; /* helper bit set to count nonzeros */
3269     PetscInt  *nnz;
3270     PetscBool sbaij;
3271 
3272     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3273     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3274     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3275     for (i=0; i<mbs; i++) {
3276       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3277       for (k=0; k<bs; k++) {
3278         PetscInt row = bs*i + k;
3279         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3280           PetscInt col = colidxs[j];
3281           if (!sbaij || col >= row)
3282             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3283         }
3284       }
3285     }
3286     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3287     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3288     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3289     ierr = PetscFree(nnz);CHKERRQ(ierr);
3290   }
3291 
3292   /* store matrix values */
3293   for (i=0; i<m; i++) {
3294     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3295     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3296   }
3297 
3298   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3299   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3300   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 
MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)3305 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3306 {
3307   PetscErrorCode ierr;
3308   PetscBool      isbinary;
3309 
3310   PetscFunctionBegin;
3311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3312   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3313   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 /*@C
3318    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3319    compressed row) format.  For good matrix assembly performance the
3320    user should preallocate the matrix storage by setting the parameter nz
3321    (or the array nnz).  By setting these parameters accurately, performance
3322    during matrix assembly can be increased by more than a factor of 50.
3323 
3324    Collective
3325 
3326    Input Parameters:
3327 +  comm - MPI communicator, set to PETSC_COMM_SELF
3328 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3329           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3330 .  m - number of rows
3331 .  n - number of columns
3332 .  nz - number of nonzero blocks  per block row (same for all rows)
3333 -  nnz - array containing the number of nonzero blocks in the various block rows
3334          (possibly different for each block row) or NULL
3335 
3336    Output Parameter:
3337 .  A - the matrix
3338 
3339    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3340    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3341    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3342 
3343    Options Database Keys:
3344 +   -mat_no_unroll - uses code that does not unroll the loops in the
3345                      block calculations (much slower)
3346 -    -mat_block_size - size of the blocks to use
3347 
3348    Level: intermediate
3349 
3350    Notes:
3351    The number of rows and columns must be divisible by blocksize.
3352 
3353    If the nnz parameter is given then the nz parameter is ignored
3354 
3355    A nonzero block is any block that as 1 or more nonzeros in it
3356 
3357    The block AIJ format is fully compatible with standard Fortran 77
3358    storage.  That is, the stored row and column indices can begin at
3359    either one (as in Fortran) or zero.  See the users' manual for details.
3360 
3361    Specify the preallocated storage with either nz or nnz (not both).
3362    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3363    allocation.  See Users-Manual: ch_mat for details.
3364    matrices.
3365 
3366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3367 @*/
MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)3368 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3369 {
3370   PetscErrorCode ierr;
3371 
3372   PetscFunctionBegin;
3373   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3374   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3375   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3376   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 /*@C
3381    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3382    per row in the matrix. For good matrix assembly performance the
3383    user should preallocate the matrix storage by setting the parameter nz
3384    (or the array nnz).  By setting these parameters accurately, performance
3385    during matrix assembly can be increased by more than a factor of 50.
3386 
3387    Collective
3388 
3389    Input Parameters:
3390 +  B - the matrix
3391 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3392           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3393 .  nz - number of block nonzeros per block row (same for all rows)
3394 -  nnz - array containing the number of block nonzeros in the various block rows
3395          (possibly different for each block row) or NULL
3396 
3397    Options Database Keys:
3398 +   -mat_no_unroll - uses code that does not unroll the loops in the
3399                      block calculations (much slower)
3400 -   -mat_block_size - size of the blocks to use
3401 
3402    Level: intermediate
3403 
3404    Notes:
3405    If the nnz parameter is given then the nz parameter is ignored
3406 
3407    You can call MatGetInfo() to get information on how effective the preallocation was;
3408    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3409    You can also run with the option -info and look for messages with the string
3410    malloc in them to see if additional memory allocation was needed.
3411 
3412    The block AIJ format is fully compatible with standard Fortran 77
3413    storage.  That is, the stored row and column indices can begin at
3414    either one (as in Fortran) or zero.  See the users' manual for details.
3415 
3416    Specify the preallocated storage with either nz or nnz (not both).
3417    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3418    allocation.  See Users-Manual: ch_mat for details.
3419 
3420 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3421 @*/
MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3422 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3423 {
3424   PetscErrorCode ierr;
3425 
3426   PetscFunctionBegin;
3427   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3428   PetscValidType(B,1);
3429   PetscValidLogicalCollectiveInt(B,bs,2);
3430   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3431   PetscFunctionReturn(0);
3432 }
3433 
3434 /*@C
3435    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3436 
3437    Collective
3438 
3439    Input Parameters:
3440 +  B - the matrix
3441 .  i - the indices into j for the start of each local row (starts with zero)
3442 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3443 -  v - optional values in the matrix
3444 
3445    Level: advanced
3446 
3447    Notes:
3448    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3449    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3450    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3451    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3452    block column and the second index is over columns within a block.
3453 
3454    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3455 
3456 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3457 @*/
MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])3458 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3459 {
3460   PetscErrorCode ierr;
3461 
3462   PetscFunctionBegin;
3463   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3464   PetscValidType(B,1);
3465   PetscValidLogicalCollectiveInt(B,bs,2);
3466   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3467   PetscFunctionReturn(0);
3468 }
3469 
3470 
3471 /*@
3472      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3473 
3474      Collective
3475 
3476    Input Parameters:
3477 +  comm - must be an MPI communicator of size 1
3478 .  bs - size of block
3479 .  m - number of rows
3480 .  n - number of columns
3481 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3482 .  j - column indices
3483 -  a - matrix values
3484 
3485    Output Parameter:
3486 .  mat - the matrix
3487 
3488    Level: advanced
3489 
3490    Notes:
3491        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3492     once the matrix is destroyed
3493 
3494        You cannot set new nonzero locations into this matrix, that will generate an error.
3495 
3496        The i and j indices are 0 based
3497 
3498        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3499 
3500       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3501       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3502       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3503       with column-major ordering within blocks.
3504 
3505 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3506 
3507 @*/
MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat * mat)3508 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3509 {
3510   PetscErrorCode ierr;
3511   PetscInt       ii;
3512   Mat_SeqBAIJ    *baij;
3513 
3514   PetscFunctionBegin;
3515   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3516   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3517 
3518   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3519   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3520   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3521   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3522   baij = (Mat_SeqBAIJ*)(*mat)->data;
3523   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3524   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3525 
3526   baij->i = i;
3527   baij->j = j;
3528   baij->a = a;
3529 
3530   baij->singlemalloc = PETSC_FALSE;
3531   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3532   baij->free_a       = PETSC_FALSE;
3533   baij->free_ij      = PETSC_FALSE;
3534 
3535   for (ii=0; ii<m; ii++) {
3536     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3537     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3538   }
3539   if (PetscDefined(USE_DEBUG)) {
3540     for (ii=0; ii<baij->i[m]; ii++) {
3541       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3542       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3543     }
3544   }
3545 
3546   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3547   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548   PetscFunctionReturn(0);
3549 }
3550 
MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3551 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3552 {
3553   PetscErrorCode ierr;
3554   PetscMPIInt    size;
3555 
3556   PetscFunctionBegin;
3557   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3558   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3559     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3560   } else {
3561     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3562   }
3563   PetscFunctionReturn(0);
3564 }
3565