1 
2 /*
3   Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h>  /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
9 
10   #include <immintrin.h>
11 
12   #if !defined(_MM_SCALE_8)
13   #define _MM_SCALE_8    8
14   #endif
15 
16   #if defined(__AVX512F__)
17   /* these do not work
18    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
19    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
20   */
21     #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
22     /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
23     vec_idx  = _mm256_loadu_si256((__m256i const*)acolidx); \
24     vec_vals = _mm512_loadu_pd(aval); \
25     vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
26     vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
27   #elif defined(__AVX2__) && defined(__FMA__)
28     #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
29     vec_vals = _mm256_loadu_pd(aval); \
30     vec_idx  = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \
31     vec_x    = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
32     vec_y    = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
33   #endif
34 #endif  /* PETSC_HAVE_IMMINTRIN_H */
35 
36 /*@C
37  MatSeqSELLSetPreallocation - For good matrix assembly performance
38  the user should preallocate the matrix storage by setting the parameter nz
39  (or the array nnz).  By setting these parameters accurately, performance
40  during matrix assembly can be increased significantly.
41 
42  Collective
43 
44  Input Parameters:
45  +  B - The matrix
46  .  nz - number of nonzeros per row (same for all rows)
47  -  nnz - array containing the number of nonzeros in the various rows
48  (possibly different for each row) or NULL
49 
50  Notes:
51  If nnz is given then nz is ignored.
52 
53  Specify the preallocated storage with either nz or nnz (not both).
54  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
55  allocation.  For large problems you MUST preallocate memory or you
56  will get TERRIBLE performance, see the users' manual chapter on matrices.
57 
58  You can call MatGetInfo() to get information on how effective the preallocation was;
59  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
60  You can also run with the option -info and look for messages with the string
61  malloc in them to see if additional memory allocation was needed.
62 
63  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
64  entries or columns indices.
65 
66  The maximum number of nonzeos in any row should be as accurate as possible.
67  If it is underestimated, you will get bad performance due to reallocation
68  (MatSeqXSELLReallocateSELL).
69 
70  Level: intermediate
71 
72  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
73 
74  @*/
MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
76 {
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
81   PetscValidType(B,1);
82   ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
87 {
88   Mat_SeqSELL    *b;
89   PetscInt       i,j,totalslices;
90   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
95   if (maxallocrow == MAT_SKIP_ALLOCATION) {
96     skipallocation = PETSC_TRUE;
97     maxallocrow    = 0;
98   }
99 
100   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
101   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
102 
103   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
104   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
105   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
106   if (rlen) {
107     for (i=0; i<B->rmap->n; i++) {
108       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
109       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
110     }
111   }
112 
113   B->preallocated = PETSC_TRUE;
114 
115   b = (Mat_SeqSELL*)B->data;
116 
117   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
118   b->totalslices = totalslices;
119   if (!skipallocation) {
120     if (B->rmap->n & 0x07) {ierr = PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);CHKERRQ(ierr);}
121 
122     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
123       ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
124       ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
125     }
126     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
127       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
128       else if (maxallocrow < 0) maxallocrow = 1;
129       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
130     } else {
131       maxallocrow = 0;
132       b->sliidx[0] = 0;
133       for (i=1; i<totalslices; i++) {
134         b->sliidx[i] = 0;
135         for (j=0;j<8;j++) {
136           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
137         }
138         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
139         ierr = PetscIntSumError(b->sliidx[i-1],8*b->sliidx[i],&b->sliidx[i]);CHKERRQ(ierr);
140       }
141       /* last slice */
142       b->sliidx[totalslices] = 0;
143       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
144       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
145       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
146     }
147 
148     /* allocate space for val, colidx, rlen */
149     /* FIXME: should B's old memory be unlogged? */
150     ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
151     /* FIXME: assuming an element of the bit array takes 8 bits */
152     ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
153     ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
154     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
155     ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
156     ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
157 
158     b->singlemalloc = PETSC_TRUE;
159     b->free_val     = PETSC_TRUE;
160     b->free_colidx  = PETSC_TRUE;
161   } else {
162     b->free_val    = PETSC_FALSE;
163     b->free_colidx = PETSC_FALSE;
164   }
165 
166   b->nz               = 0;
167   b->maxallocrow      = maxallocrow;
168   b->rlenmax          = maxallocrow;
169   b->maxallocmat      = b->sliidx[totalslices];
170   B->info.nz_unneeded = (double)b->maxallocmat;
171   if (realalloc) {
172     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
173   }
174   PetscFunctionReturn(0);
175 }
176 
MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
178 {
179   Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
180   PetscInt    shift;
181 
182   PetscFunctionBegin;
183   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
184   if (nz) *nz = a->rlen[row];
185   shift = a->sliidx[row>>3]+(row&0x07);
186   if (!a->getrowcols) {
187     PetscErrorCode ierr;
188 
189     ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr);
190   }
191   if (idx) {
192     PetscInt j;
193     for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
194     *idx = a->getrowcols;
195   }
196   if (v) {
197     PetscInt j;
198     for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
199     *v = a->getrowvals;
200   }
201   PetscFunctionReturn(0);
202 }
203 
MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
205 {
206   PetscFunctionBegin;
207   PetscFunctionReturn(0);
208 }
209 
MatConvert_SeqSELL_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
211 {
212   Mat            B;
213   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
214   PetscInt       i;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (reuse == MAT_REUSE_MATRIX) {
219     B    = *newmat;
220     ierr = MatZeroEntries(B);CHKERRQ(ierr);
221   } else {
222     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
223     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
224     ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
225     ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
226   }
227 
228   for (i=0; i<A->rmap->n; i++) {
229     PetscInt    nz = 0,*cols = NULL;
230     PetscScalar *vals = NULL;
231 
232     ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
233     ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
234     ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
235   }
236 
237   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
238   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239   B->rmap->bs = A->rmap->bs;
240 
241   if (reuse == MAT_INPLACE_MATRIX) {
242     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
243   } else {
244     *newmat = B;
245   }
246   PetscFunctionReturn(0);
247 }
248 
249 #include <../src/mat/impls/aij/seq/aij.h>
250 
MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
252 {
253   Mat               B;
254   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
255   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
256   const PetscInt    *cols;
257   const PetscScalar *vals;
258   PetscErrorCode    ierr;
259 
260   PetscFunctionBegin;
261 
262   if (reuse == MAT_REUSE_MATRIX) {
263     B = *newmat;
264   } else {
265     /* Can we just use ilen? */
266     ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
267     for (i=0; i<m; i++) {
268       rowlengths[i] = ai[i+1] - ai[i];
269     }
270 
271     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
272     ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
273     ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
274     ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
275     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
276   }
277 
278   for (row=0; row<m; row++) {
279     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
280     ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
281     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
282   }
283   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
284   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
285   B->rmap->bs = A->rmap->bs;
286 
287   if (reuse == MAT_INPLACE_MATRIX) {
288     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
289   } else {
290     *newmat = B;
291   }
292   PetscFunctionReturn(0);
293 }
294 
MatMult_SeqSELL(Mat A,Vec xx,Vec yy)295 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
296 {
297   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
298   PetscScalar       *y;
299   const PetscScalar *x;
300   const MatScalar   *aval=a->val;
301   PetscInt          totalslices=a->totalslices;
302   const PetscInt    *acolidx=a->colidx;
303   PetscInt          i,j;
304   PetscErrorCode    ierr;
305 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
306   __m512d           vec_x,vec_y,vec_vals;
307   __m256i           vec_idx;
308   __mmask8          mask;
309   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
310   __m256i           vec_idx2,vec_idx3,vec_idx4;
311 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
312   __m128i           vec_idx;
313   __m256d           vec_x,vec_y,vec_y2,vec_vals;
314   MatScalar         yval;
315   PetscInt          r,rows_left,row,nnz_in_row;
316 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
317   __m128d           vec_x_tmp;
318   __m256d           vec_x,vec_y,vec_y2,vec_vals;
319   MatScalar         yval;
320   PetscInt          r,rows_left,row,nnz_in_row;
321 #else
322   PetscScalar       sum[8];
323 #endif
324 
325 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
326 #pragma disjoint(*x,*y,*aval)
327 #endif
328 
329   PetscFunctionBegin;
330   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
331   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
332 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
333   for (i=0; i<totalslices; i++) { /* loop over slices */
334     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
335     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
336 
337     vec_y  = _mm512_setzero_pd();
338     vec_y2 = _mm512_setzero_pd();
339     vec_y3 = _mm512_setzero_pd();
340     vec_y4 = _mm512_setzero_pd();
341 
342     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
343     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
344     case 3:
345       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
346       acolidx += 8; aval += 8;
347       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
348       acolidx += 8; aval += 8;
349       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
350       acolidx += 8; aval += 8;
351       j += 3;
352       break;
353     case 2:
354       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
355       acolidx += 8; aval += 8;
356       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
357       acolidx += 8; aval += 8;
358       j += 2;
359       break;
360     case 1:
361       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
362       acolidx += 8; aval += 8;
363       j += 1;
364       break;
365     }
366     #pragma novector
367     for (; j<(a->sliidx[i+1]>>3); j+=4) {
368       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
369       acolidx += 8; aval += 8;
370       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
371       acolidx += 8; aval += 8;
372       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
373       acolidx += 8; aval += 8;
374       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
375       acolidx += 8; aval += 8;
376     }
377 
378     vec_y = _mm512_add_pd(vec_y,vec_y2);
379     vec_y = _mm512_add_pd(vec_y,vec_y3);
380     vec_y = _mm512_add_pd(vec_y,vec_y4);
381     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
382       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
383       _mm512_mask_storeu_pd(&y[8*i],mask,vec_y);
384     } else {
385       _mm512_storeu_pd(&y[8*i],vec_y);
386     }
387   }
388 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
389   for (i=0; i<totalslices; i++) { /* loop over full slices */
390     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
391     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
392 
393     /* last slice may have padding rows. Don't use vectorization. */
394     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
395       rows_left = A->rmap->n - 8*i;
396       for (r=0; r<rows_left; ++r) {
397         yval = (MatScalar)0;
398         row = 8*i + r;
399         nnz_in_row = a->rlen[row];
400         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
401         y[row] = yval;
402       }
403       break;
404     }
405 
406     vec_y  = _mm256_setzero_pd();
407     vec_y2 = _mm256_setzero_pd();
408 
409     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
410     #pragma novector
411     #pragma unroll(2)
412     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
413       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
414       aval += 4; acolidx += 4;
415       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
416       aval += 4; acolidx += 4;
417     }
418 
419     _mm256_storeu_pd(y+i*8,vec_y);
420     _mm256_storeu_pd(y+i*8+4,vec_y2);
421   }
422 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
423   for (i=0; i<totalslices; i++) { /* loop over full slices */
424     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
425     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
426 
427     vec_y  = _mm256_setzero_pd();
428     vec_y2 = _mm256_setzero_pd();
429 
430     /* last slice may have padding rows. Don't use vectorization. */
431     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
432       rows_left = A->rmap->n - 8*i;
433       for (r=0; r<rows_left; ++r) {
434         yval = (MatScalar)0;
435         row = 8*i + r;
436         nnz_in_row = a->rlen[row];
437         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
438         y[row] = yval;
439       }
440       break;
441     }
442 
443     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
444     #pragma novector
445     #pragma unroll(2)
446     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
447       vec_vals  = _mm256_loadu_pd(aval);
448       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
449       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
450       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
451       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
452       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
453       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
454       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
455       aval     += 4;
456 
457       vec_vals  = _mm256_loadu_pd(aval);
458       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
459       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
460       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
461       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
462       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
463       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
464       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
465       aval     += 4;
466     }
467 
468     _mm256_storeu_pd(y + i*8,     vec_y);
469     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
470   }
471 #else
472   for (i=0; i<totalslices; i++) { /* loop over slices */
473     for (j=0; j<8; j++) sum[j] = 0.0;
474     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
475       sum[0] += aval[j] * x[acolidx[j]];
476       sum[1] += aval[j+1] * x[acolidx[j+1]];
477       sum[2] += aval[j+2] * x[acolidx[j+2]];
478       sum[3] += aval[j+3] * x[acolidx[j+3]];
479       sum[4] += aval[j+4] * x[acolidx[j+4]];
480       sum[5] += aval[j+5] * x[acolidx[j+5]];
481       sum[6] += aval[j+6] * x[acolidx[j+6]];
482       sum[7] += aval[j+7] * x[acolidx[j+7]];
483     }
484     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
485       for (j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
486     } else {
487       for (j=0; j<8; j++) y[8*i+j] = sum[j];
488     }
489   }
490 #endif
491 
492   ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
493   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
494   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)499 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
500 {
501   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
502   PetscScalar       *y,*z;
503   const PetscScalar *x;
504   const MatScalar   *aval=a->val;
505   PetscInt          totalslices=a->totalslices;
506   const PetscInt    *acolidx=a->colidx;
507   PetscInt          i,j;
508   PetscErrorCode    ierr;
509 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
510   __m512d           vec_x,vec_y,vec_vals;
511   __m256i           vec_idx;
512   __mmask8          mask;
513   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
514   __m256i           vec_idx2,vec_idx3,vec_idx4;
515 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
516   __m128d           vec_x_tmp;
517   __m256d           vec_x,vec_y,vec_y2,vec_vals;
518   MatScalar         yval;
519   PetscInt          r,row,nnz_in_row;
520 #else
521   PetscScalar       sum[8];
522 #endif
523 
524 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
525 #pragma disjoint(*x,*y,*aval)
526 #endif
527 
528   PetscFunctionBegin;
529   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
530   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
531 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
532   for (i=0; i<totalslices; i++) { /* loop over slices */
533     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
534     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
535 
536     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
537       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
538       vec_y  = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]);
539     } else {
540       vec_y  = _mm512_loadu_pd(&y[8*i]);
541     }
542     vec_y2 = _mm512_setzero_pd();
543     vec_y3 = _mm512_setzero_pd();
544     vec_y4 = _mm512_setzero_pd();
545 
546     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
547     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
548     case 3:
549       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
550       acolidx += 8; aval += 8;
551       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
552       acolidx += 8; aval += 8;
553       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
554       acolidx += 8; aval += 8;
555       j += 3;
556       break;
557     case 2:
558       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
559       acolidx += 8; aval += 8;
560       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
561       acolidx += 8; aval += 8;
562       j += 2;
563       break;
564     case 1:
565       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
566       acolidx += 8; aval += 8;
567       j += 1;
568       break;
569     }
570     #pragma novector
571     for (; j<(a->sliidx[i+1]>>3); j+=4) {
572       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
573       acolidx += 8; aval += 8;
574       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
575       acolidx += 8; aval += 8;
576       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
577       acolidx += 8; aval += 8;
578       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
579       acolidx += 8; aval += 8;
580     }
581 
582     vec_y = _mm512_add_pd(vec_y,vec_y2);
583     vec_y = _mm512_add_pd(vec_y,vec_y3);
584     vec_y = _mm512_add_pd(vec_y,vec_y4);
585     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
586       _mm512_mask_storeu_pd(&z[8*i],mask,vec_y);
587     } else {
588       _mm512_storeu_pd(&z[8*i],vec_y);
589     }
590   }
591 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
592   for (i=0; i<totalslices; i++) { /* loop over full slices */
593     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
594     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
595 
596     /* last slice may have padding rows. Don't use vectorization. */
597     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
598       for (r=0; r<(A->rmap->n & 0x07); ++r) {
599         row        = 8*i + r;
600         yval       = (MatScalar)0.0;
601         nnz_in_row = a->rlen[row];
602         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
603         z[row] = y[row] + yval;
604       }
605       break;
606     }
607 
608     vec_y  = _mm256_loadu_pd(y+8*i);
609     vec_y2 = _mm256_loadu_pd(y+8*i+4);
610 
611     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
612     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
613       vec_vals  = _mm256_loadu_pd(aval);
614       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
615       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
616       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
617       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
618       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
619       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
620       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
621       aval     += 4;
622 
623       vec_vals  = _mm256_loadu_pd(aval);
624       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
625       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
626       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
627       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
628       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
629       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
630       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
631       aval     += 4;
632     }
633 
634     _mm256_storeu_pd(z+i*8,vec_y);
635     _mm256_storeu_pd(z+i*8+4,vec_y2);
636   }
637 #else
638   for (i=0; i<totalslices; i++) { /* loop over slices */
639     for (j=0; j<8; j++) sum[j] = 0.0;
640     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
641       sum[0] += aval[j] * x[acolidx[j]];
642       sum[1] += aval[j+1] * x[acolidx[j+1]];
643       sum[2] += aval[j+2] * x[acolidx[j+2]];
644       sum[3] += aval[j+3] * x[acolidx[j+3]];
645       sum[4] += aval[j+4] * x[acolidx[j+4]];
646       sum[5] += aval[j+5] * x[acolidx[j+5]];
647       sum[6] += aval[j+6] * x[acolidx[j+6]];
648       sum[7] += aval[j+7] * x[acolidx[j+7]];
649     }
650     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
651       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
652     } else {
653       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
654     }
655   }
656 #endif
657 
658   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
659   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
660   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)664 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
665 {
666   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
667   PetscScalar       *y;
668   const PetscScalar *x;
669   const MatScalar   *aval=a->val;
670   const PetscInt    *acolidx=a->colidx;
671   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
672   PetscErrorCode    ierr;
673 
674 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
675 #pragma disjoint(*x,*y,*aval)
676 #endif
677 
678   PetscFunctionBegin;
679   if (A->symmetric) {
680     ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr);
681     PetscFunctionReturn(0);
682   }
683   if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
684   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
685   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
686   for (i=0; i<a->totalslices; i++) { /* loop over slices */
687     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
688       for (r=0; r<(A->rmap->n & 0x07); ++r) {
689         row        = 8*i + r;
690         nnz_in_row = a->rlen[row];
691         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
692       }
693       break;
694     }
695     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
696       y[acolidx[j]]   += aval[j] * x[8*i];
697       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
698       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
699       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
700       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
701       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
702       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
703       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
704     }
705   }
706   ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
707   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
708   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)712 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
713 {
714   PetscErrorCode ierr;
715 
716   PetscFunctionBegin;
717   if (A->symmetric) {
718     ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr);
719   } else {
720     ierr = VecSet(yy,0.0);CHKERRQ(ierr);
721     ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 /*
727      Checks for missing diagonals
728 */
MatMissingDiagonal_SeqSELL(Mat A,PetscBool * missing,PetscInt * d)729 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
730 {
731   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
732   PetscInt       *diag,i;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736   *missing = PETSC_FALSE;
737   if (A->rmap->n > 0 && !(a->colidx)) {
738     *missing = PETSC_TRUE;
739     if (d) *d = 0;
740     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
741   } else {
742     diag = a->diag;
743     for (i=0; i<A->rmap->n; i++) {
744       if (diag[i] == -1) {
745         *missing = PETSC_TRUE;
746         if (d) *d = i;
747         ierr = PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);CHKERRQ(ierr);
748         break;
749       }
750     }
751   }
752   PetscFunctionReturn(0);
753 }
754 
MatMarkDiagonal_SeqSELL(Mat A)755 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
756 {
757   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
758   PetscInt       i,j,m=A->rmap->n,shift;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   if (!a->diag) {
763     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
764     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
765     a->free_diag = PETSC_TRUE;
766   }
767   for (i=0; i<m; i++) { /* loop over rows */
768     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
769     a->diag[i] = -1;
770     for (j=0; j<a->rlen[i]; j++) {
771       if (a->colidx[shift+j*8] == i) {
772         a->diag[i] = shift+j*8;
773         break;
774       }
775     }
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 /*
781   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
782 */
MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)783 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
784 {
785   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
786   PetscInt       i,*diag,m = A->rmap->n;
787   MatScalar      *val = a->val;
788   PetscScalar    *idiag,*mdiag;
789   PetscErrorCode ierr;
790 
791   PetscFunctionBegin;
792   if (a->idiagvalid) PetscFunctionReturn(0);
793   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
794   diag = a->diag;
795   if (!a->idiag) {
796     ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
797     ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
798     val  = a->val;
799   }
800   mdiag = a->mdiag;
801   idiag = a->idiag;
802 
803   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
804     for (i=0; i<m; i++) {
805       mdiag[i] = val[diag[i]];
806       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
807         if (PetscRealPart(fshift)) {
808           ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
809           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
810           A->factorerror_zeropivot_value = 0.0;
811           A->factorerror_zeropivot_row   = i;
812         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
813       }
814       idiag[i] = 1.0/val[diag[i]];
815     }
816     ierr = PetscLogFlops(m);CHKERRQ(ierr);
817   } else {
818     for (i=0; i<m; i++) {
819       mdiag[i] = val[diag[i]];
820       idiag[i] = omega/(fshift + val[diag[i]]);
821     }
822     ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
823   }
824   a->idiagvalid = PETSC_TRUE;
825   PetscFunctionReturn(0);
826 }
827 
MatZeroEntries_SeqSELL(Mat A)828 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
829 {
830   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
831   PetscErrorCode ierr;
832 
833   PetscFunctionBegin;
834   ierr = PetscArrayzero(a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
835   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
836   PetscFunctionReturn(0);
837 }
838 
MatDestroy_SeqSELL(Mat A)839 PetscErrorCode MatDestroy_SeqSELL(Mat A)
840 {
841   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
842   PetscErrorCode ierr;
843 
844   PetscFunctionBegin;
845 #if defined(PETSC_USE_LOG)
846   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
847 #endif
848   ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
849   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
850   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
851   ierr = PetscFree(a->diag);CHKERRQ(ierr);
852   ierr = PetscFree(a->rlen);CHKERRQ(ierr);
853   ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
854   ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
855   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
856   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
857   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
858   ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr);
859 
860   ierr = PetscFree(A->data);CHKERRQ(ierr);
861 
862   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)869 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
870 {
871   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   switch (op) {
876   case MAT_ROW_ORIENTED:
877     a->roworiented = flg;
878     break;
879   case MAT_KEEP_NONZERO_PATTERN:
880     a->keepnonzeropattern = flg;
881     break;
882   case MAT_NEW_NONZERO_LOCATIONS:
883     a->nonew = (flg ? 0 : 1);
884     break;
885   case MAT_NEW_NONZERO_LOCATION_ERR:
886     a->nonew = (flg ? -1 : 0);
887     break;
888   case MAT_NEW_NONZERO_ALLOCATION_ERR:
889     a->nonew = (flg ? -2 : 0);
890     break;
891   case MAT_UNUSED_NONZERO_LOCATION_ERR:
892     a->nounused = (flg ? -1 : 0);
893     break;
894   case MAT_NEW_DIAGONALS:
895   case MAT_IGNORE_OFF_PROC_ENTRIES:
896   case MAT_USE_HASH_TABLE:
897   case MAT_SORTED_FULL:
898     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
899     break;
900   case MAT_SPD:
901   case MAT_SYMMETRIC:
902   case MAT_STRUCTURALLY_SYMMETRIC:
903   case MAT_HERMITIAN:
904   case MAT_SYMMETRY_ETERNAL:
905     /* These options are handled directly by MatSetOption() */
906     break;
907   default:
908     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
909   }
910   PetscFunctionReturn(0);
911 }
912 
MatGetDiagonal_SeqSELL(Mat A,Vec v)913 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
914 {
915   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
916   PetscInt       i,j,n,shift;
917   PetscScalar    *x,zero=0.0;
918   PetscErrorCode ierr;
919 
920   PetscFunctionBegin;
921   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
922   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
923 
924   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
925     PetscInt *diag=a->diag;
926     ierr = VecGetArray(v,&x);CHKERRQ(ierr);
927     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
928     ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
929     PetscFunctionReturn(0);
930   }
931 
932   ierr = VecSet(v,zero);CHKERRQ(ierr);
933   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
934   for (i=0; i<n; i++) { /* loop over rows */
935     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
936     x[i] = 0;
937     for (j=0; j<a->rlen[i]; j++) {
938       if (a->colidx[shift+j*8] == i) {
939         x[i] = a->val[shift+j*8];
940         break;
941       }
942     }
943   }
944   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
945   PetscFunctionReturn(0);
946 }
947 
MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)948 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
949 {
950   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
951   const PetscScalar *l,*r;
952   PetscInt          i,j,m,n,row;
953   PetscErrorCode    ierr;
954 
955   PetscFunctionBegin;
956   if (ll) {
957     /* The local size is used so that VecMPI can be passed to this routine
958        by MatDiagonalScale_MPISELL */
959     ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
960     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
961     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
962     for (i=0; i<a->totalslices; i++) { /* loop over slices */
963       if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
964         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
965           if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8*i+row];
966         }
967       } else {
968         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
969           a->val[j] *= l[8*i+row];
970         }
971       }
972     }
973     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
974     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
975   }
976   if (rr) {
977     ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
978     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
979     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
980     for (i=0; i<a->totalslices; i++) { /* loop over slices */
981       if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
982         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
983           if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]];
984         }
985       } else {
986         for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
987           a->val[j] *= r[a->colidx[j]];
988         }
989       }
990     }
991     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
992     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
993   }
994   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
999 
MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])1000 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1001 {
1002   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1003   PetscInt    *cp,i,k,low,high,t,row,col,l;
1004   PetscInt    shift;
1005   MatScalar   *vp;
1006 
1007   PetscFunctionBegin;
1008   for (k=0; k<m; k++) { /* loop over requested rows */
1009     row = im[k];
1010     if (row<0) continue;
1011     if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1012     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1013     cp = a->colidx+shift; /* pointer to the row */
1014     vp = a->val+shift; /* pointer to the row */
1015     for (l=0; l<n; l++) { /* loop over requested columns */
1016       col = in[l];
1017       if (col<0) continue;
1018       if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
1019       high = a->rlen[row]; low = 0; /* assume unsorted */
1020       while (high-low > 5) {
1021         t = (low+high)/2;
1022         if (*(cp+t*8) > col) high = t;
1023         else low = t;
1024       }
1025       for (i=low; i<high; i++) {
1026         if (*(cp+8*i) > col) break;
1027         if (*(cp+8*i) == col) {
1028           *v++ = *(vp+8*i);
1029           goto finished;
1030         }
1031       }
1032       *v++ = 0.0;
1033     finished:;
1034     }
1035   }
1036   PetscFunctionReturn(0);
1037 }
1038 
MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)1039 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1040 {
1041   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1042   PetscInt          i,j,m=A->rmap->n,shift;
1043   const char        *name;
1044   PetscViewerFormat format;
1045   PetscErrorCode    ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1049   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1050     PetscInt nofinalvalue = 0;
1051     /*
1052     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1053       nofinalvalue = 1;
1054     }
1055     */
1056     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1057     ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
1058     ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
1059 #if defined(PETSC_USE_COMPLEX)
1060     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1061 #else
1062     ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1063 #endif
1064     ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
1065 
1066     for (i=0; i<m; i++) {
1067       shift = a->sliidx[i>>3]+(i&0x07);
1068       for (j=0; j<a->rlen[i]; j++) {
1069 #if defined(PETSC_USE_COMPLEX)
1070         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1071 #else
1072         ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1073 #endif
1074       }
1075     }
1076     /*
1077     if (nofinalvalue) {
1078 #if defined(PETSC_USE_COMPLEX)
1079       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
1080 #else
1081       ierr = PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
1082 #endif
1083     }
1084     */
1085     ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
1086     ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
1087     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1088   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1089     PetscFunctionReturn(0);
1090   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1091     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1092     for (i=0; i<m; i++) {
1093       ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1094       shift = a->sliidx[i>>3]+(i&0x07);
1095       for (j=0; j<a->rlen[i]; j++) {
1096 #if defined(PETSC_USE_COMPLEX)
1097         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1098           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1099         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1100           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1101         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1102           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1103         }
1104 #else
1105         if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
1106 #endif
1107       }
1108       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1109     }
1110     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1111   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1112     PetscInt    cnt=0,jcnt;
1113     PetscScalar value;
1114 #if defined(PETSC_USE_COMPLEX)
1115     PetscBool   realonly=PETSC_TRUE;
1116     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1117       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1118         realonly = PETSC_FALSE;
1119         break;
1120       }
1121     }
1122 #endif
1123 
1124     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1125     for (i=0; i<m; i++) {
1126       jcnt = 0;
1127       shift = a->sliidx[i>>3]+(i&0x07);
1128       for (j=0; j<A->cmap->n; j++) {
1129         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1130           value = a->val[cnt++];
1131           jcnt++;
1132         } else {
1133           value = 0.0;
1134         }
1135 #if defined(PETSC_USE_COMPLEX)
1136         if (realonly) {
1137           ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
1138         } else {
1139           ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
1140         }
1141 #else
1142         ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
1143 #endif
1144       }
1145       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1146     }
1147     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1148   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1149     PetscInt fshift=1;
1150     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1151 #if defined(PETSC_USE_COMPLEX)
1152     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
1153 #else
1154     ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
1155 #endif
1156     ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
1157     for (i=0; i<m; i++) {
1158       shift = a->sliidx[i>>3]+(i&0x07);
1159       for (j=0; j<a->rlen[i]; j++) {
1160 #if defined(PETSC_USE_COMPLEX)
1161         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1162 #else
1163         ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1164 #endif
1165       }
1166     }
1167     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1168   } else if (format == PETSC_VIEWER_NATIVE) {
1169     for (i=0; i<a->totalslices; i++) { /* loop over slices */
1170       PetscInt row;
1171       ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr);
1172       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
1173 #if defined(PETSC_USE_COMPLEX)
1174         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1175           ierr = PetscViewerASCIIPrintf(viewer,"  %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1176         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1177           ierr = PetscViewerASCIIPrintf(viewer,"  %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1178         } else {
1179           ierr = PetscViewerASCIIPrintf(viewer,"  %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1180         }
1181 #else
1182         ierr = PetscViewerASCIIPrintf(viewer,"  %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1183 #endif
1184       }
1185     }
1186   } else {
1187     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1188     if (A->factortype) {
1189       for (i=0; i<m; i++) {
1190         shift = a->sliidx[i>>3]+(i&0x07);
1191         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1192         /* L part */
1193         for (j=shift; j<a->diag[i]; j+=8) {
1194 #if defined(PETSC_USE_COMPLEX)
1195           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1196             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1197           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1198             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1199           } else {
1200             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1201           }
1202 #else
1203           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1204 #endif
1205         }
1206         /* diagonal */
1207         j = a->diag[i];
1208 #if defined(PETSC_USE_COMPLEX)
1209         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1210           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1211         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1212           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1213         } else {
1214           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1215         }
1216 #else
1217         ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1218 #endif
1219 
1220         /* U part */
1221         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1222 #if defined(PETSC_USE_COMPLEX)
1223           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1224             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1225           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1226             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1227           } else {
1228             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1229           }
1230 #else
1231           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1232 #endif
1233         }
1234         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1235       }
1236     } else {
1237       for (i=0; i<m; i++) {
1238         shift = a->sliidx[i>>3]+(i&0x07);
1239         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1240         for (j=0; j<a->rlen[i]; j++) {
1241 #if defined(PETSC_USE_COMPLEX)
1242           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1243             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1244           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1245             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1246           } else {
1247             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1248           }
1249 #else
1250           ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1251 #endif
1252         }
1253         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1254       }
1255     }
1256     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1257   }
1258   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #include <petscdraw.h>
MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void * Aa)1263 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1264 {
1265   Mat               A=(Mat)Aa;
1266   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1267   PetscInt          i,j,m=A->rmap->n,shift;
1268   int               color;
1269   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1270   PetscViewer       viewer;
1271   PetscViewerFormat format;
1272   PetscErrorCode    ierr;
1273 
1274   PetscFunctionBegin;
1275   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1276   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1277   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1278 
1279   /* loop over matrix elements drawing boxes */
1280 
1281   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1282     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1283     /* Blue for negative, Cyan for zero and  Red for positive */
1284     color = PETSC_DRAW_BLUE;
1285     for (i=0; i<m; i++) {
1286       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1287       y_l = m - i - 1.0; y_r = y_l + 1.0;
1288       for (j=0; j<a->rlen[i]; j++) {
1289         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1290         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1291         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1292       }
1293     }
1294     color = PETSC_DRAW_CYAN;
1295     for (i=0; i<m; i++) {
1296       shift = a->sliidx[i>>3]+(i&0x07);
1297       y_l = m - i - 1.0; y_r = y_l + 1.0;
1298       for (j=0; j<a->rlen[i]; j++) {
1299         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1300         if (a->val[shift+8*j] !=  0.) continue;
1301         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1302       }
1303     }
1304     color = PETSC_DRAW_RED;
1305     for (i=0; i<m; i++) {
1306       shift = a->sliidx[i>>3]+(i&0x07);
1307       y_l = m - i - 1.0; y_r = y_l + 1.0;
1308       for (j=0; j<a->rlen[i]; j++) {
1309         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1310         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1311         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1312       }
1313     }
1314     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1315   } else {
1316     /* use contour shading to indicate magnitude of values */
1317     /* first determine max of all nonzero values */
1318     PetscReal minv=0.0,maxv=0.0;
1319     PetscInt  count=0;
1320     PetscDraw popup;
1321     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1322       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1323     }
1324     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1325     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1326     ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1327 
1328     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1329     for (i=0; i<m; i++) {
1330       shift = a->sliidx[i>>3]+(i&0x07);
1331       y_l = m - i - 1.0;
1332       y_r = y_l + 1.0;
1333       for (j=0; j<a->rlen[i]; j++) {
1334         x_l = a->colidx[shift+j*8];
1335         x_r = x_l + 1.0;
1336         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1337         ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1338         count++;
1339       }
1340     }
1341     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1342   }
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #include <petscdraw.h>
MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)1347 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1348 {
1349   PetscDraw      draw;
1350   PetscReal      xr,yr,xl,yl,h,w;
1351   PetscBool      isnull;
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1356   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1357   if (isnull) PetscFunctionReturn(0);
1358 
1359   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1360   xr  += w;          yr += h;         xl = -w;     yl = -h;
1361   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1362   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1363   ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1364   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1365   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
MatView_SeqSELL(Mat A,PetscViewer viewer)1369 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1370 {
1371   PetscBool      iascii,isbinary,isdraw;
1372   PetscErrorCode ierr;
1373 
1374   PetscFunctionBegin;
1375   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1376   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1377   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1378   if (iascii) {
1379     ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1380   } else if (isbinary) {
1381     /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1382   } else if (isdraw) {
1383     ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)1388 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1389 {
1390   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1391   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1392   MatScalar      *vp;
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1397   /* To do: compress out the unused elements */
1398   ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1399   ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1400   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1401   ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1402   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1403   for (i=0; i<a->totalslices; ++i) {
1404     shift = a->sliidx[i];    /* starting index of the slice */
1405     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1406     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1407     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1408       row  = 8*i + row_in_slice;
1409       nrow = a->rlen[row]; /* number of nonzeros in row */
1410       /*
1411         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1412         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1413       */
1414       lastcol = 0;
1415       if (nrow>0) { /* nonempty row */
1416         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1417       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1418         for (j=1;j<8;j++) {
1419           if (a->rlen[8*i+j]) {
1420             lastcol = cp[j];
1421             break;
1422           }
1423         }
1424       } else {
1425         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1426       }
1427 
1428       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1429         cp[8*k+row_in_slice] = lastcol;
1430         vp[8*k+row_in_slice] = (MatScalar)0;
1431       }
1432     }
1433   }
1434 
1435   A->info.mallocs += a->reallocs;
1436   a->reallocs      = 0;
1437 
1438   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1439   PetscFunctionReturn(0);
1440 }
1441 
MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo * info)1442 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1443 {
1444   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1445 
1446   PetscFunctionBegin;
1447   info->block_size   = 1.0;
1448   info->nz_allocated = a->maxallocmat;
1449   info->nz_used      = a->sliidx[a->totalslices]; /* include padding zeros */
1450   info->nz_unneeded  = (a->maxallocmat-a->sliidx[a->totalslices]);
1451   info->assemblies   = A->num_ass;
1452   info->mallocs      = A->info.mallocs;
1453   info->memory       = ((PetscObject)A)->mem;
1454   if (A->factortype) {
1455     info->fill_ratio_given  = A->info.fill_ratio_given;
1456     info->fill_ratio_needed = A->info.fill_ratio_needed;
1457     info->factor_mallocs    = A->info.factor_mallocs;
1458   } else {
1459     info->fill_ratio_given  = 0;
1460     info->fill_ratio_needed = 0;
1461     info->factor_mallocs    = 0;
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)1466 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1467 {
1468   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1469   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1470   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1471   MatScalar      *vp,value;
1472   PetscErrorCode ierr;
1473 
1474   PetscFunctionBegin;
1475   for (k=0; k<m; k++) { /* loop over added rows */
1476     row = im[k];
1477     if (row < 0) continue;
1478     if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1479     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1480     cp    = a->colidx+shift; /* pointer to the row */
1481     vp    = a->val+shift; /* pointer to the row */
1482     nrow  = a->rlen[row];
1483     low   = 0;
1484     high  = nrow;
1485 
1486     for (l=0; l<n; l++) { /* loop over added columns */
1487       col = in[l];
1488       if (col<0) continue;
1489       if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1490       if (a->roworiented) {
1491         value = v[l+k*n];
1492       } else {
1493         value = v[k+l*m];
1494       }
1495       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1496 
1497       /* search in this row for the specified colmun, i indicates the column to be set */
1498       if (col <= lastcol) low = 0;
1499       else high = nrow;
1500       lastcol = col;
1501       while (high-low > 5) {
1502         t = (low+high)/2;
1503         if (*(cp+t*8) > col) high = t;
1504         else low = t;
1505       }
1506       for (i=low; i<high; i++) {
1507         if (*(cp+i*8) > col) break;
1508         if (*(cp+i*8) == col) {
1509           if (is == ADD_VALUES) *(vp+i*8) += value;
1510           else *(vp+i*8) = value;
1511           low = i + 1;
1512           goto noinsert;
1513         }
1514       }
1515       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1516       if (nonew == 1) goto noinsert;
1517       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1518       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1519       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1520       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1521       for (ii=nrow-1; ii>=i; ii--) {
1522         *(cp+(ii+1)*8) = *(cp+ii*8);
1523         *(vp+(ii+1)*8) = *(vp+ii*8);
1524       }
1525       a->rlen[row]++;
1526       *(cp+i*8) = col;
1527       *(vp+i*8) = value;
1528       a->nz++;
1529       A->nonzerostate++;
1530       low = i+1; high++; nrow++;
1531 noinsert:;
1532     }
1533     a->rlen[row] = nrow;
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)1538 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1539 {
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   /* If the two matrices have the same copy implementation, use fast copy. */
1544   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1545     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1546     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1547 
1548     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1549     ierr = PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1550   } else {
1551     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1552   }
1553   PetscFunctionReturn(0);
1554 }
1555 
MatSetUp_SeqSELL(Mat A)1556 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1557 {
1558   PetscErrorCode ierr;
1559 
1560   PetscFunctionBegin;
1561   ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 
MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar * array[])1565 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1566 {
1567   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1568 
1569   PetscFunctionBegin;
1570   *array = a->val;
1571   PetscFunctionReturn(0);
1572 }
1573 
MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar * array[])1574 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1575 {
1576   PetscFunctionBegin;
1577   PetscFunctionReturn(0);
1578 }
1579 
MatRealPart_SeqSELL(Mat A)1580 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1581 {
1582   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1583   PetscInt    i;
1584   MatScalar   *aval=a->val;
1585 
1586   PetscFunctionBegin;
1587   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1588   PetscFunctionReturn(0);
1589 }
1590 
MatImaginaryPart_SeqSELL(Mat A)1591 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1592 {
1593   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1594   PetscInt       i;
1595   MatScalar      *aval=a->val;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1600   ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1601   PetscFunctionReturn(0);
1602 }
1603 
MatScale_SeqSELL(Mat inA,PetscScalar alpha)1604 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1605 {
1606   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1607   MatScalar      *aval=a->val;
1608   PetscScalar    oalpha=alpha;
1609   PetscBLASInt   one=1,size;
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1614   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1615   ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1616   ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1617   PetscFunctionReturn(0);
1618 }
1619 
MatShift_SeqSELL(Mat Y,PetscScalar a)1620 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1621 {
1622   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   if (!Y->preallocated || !y->nz) {
1627     ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1628   }
1629   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1630   PetscFunctionReturn(0);
1631 }
1632 
MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)1633 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1634 {
1635   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1636   PetscScalar       *x,sum,*t;
1637   const MatScalar   *idiag=NULL,*mdiag;
1638   const PetscScalar *b,*xb;
1639   PetscInt          n,m=A->rmap->n,i,j,shift;
1640   const PetscInt    *diag;
1641   PetscErrorCode    ierr;
1642 
1643   PetscFunctionBegin;
1644   its = its*lits;
1645 
1646   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1647   if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1648   a->fshift = fshift;
1649   a->omega  = omega;
1650 
1651   diag  = a->diag;
1652   t     = a->ssor_work;
1653   idiag = a->idiag;
1654   mdiag = a->mdiag;
1655 
1656   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1657   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1658   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1659   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1660   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1661   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1662 
1663   if (flag & SOR_ZERO_INITIAL_GUESS) {
1664     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1665       for (i=0; i<m; i++) {
1666         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1667         sum   = b[i];
1668         n     = (diag[i]-shift)/8;
1669         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1670         t[i]  = sum;
1671         x[i]  = sum*idiag[i];
1672       }
1673       xb   = t;
1674       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1675     } else xb = b;
1676     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1677       for (i=m-1; i>=0; i--) {
1678         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1679         sum   = xb[i];
1680         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1681         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1682         if (xb == b) {
1683           x[i] = sum*idiag[i];
1684         } else {
1685           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1686         }
1687       }
1688       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1689     }
1690     its--;
1691   }
1692   while (its--) {
1693     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1694       for (i=0; i<m; i++) {
1695         /* lower */
1696         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1697         sum   = b[i];
1698         n     = (diag[i]-shift)/8;
1699         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1700         t[i]  = sum;             /* save application of the lower-triangular part */
1701         /* upper */
1702         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1703         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1704         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1705       }
1706       xb   = t;
1707       ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1708     } else xb = b;
1709     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1710       for (i=m-1; i>=0; i--) {
1711         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1712         sum = xb[i];
1713         if (xb == b) {
1714           /* whole matrix (no checkpointing available) */
1715           n     = a->rlen[i];
1716           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1717           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1718         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1719           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1720           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1721           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1722         }
1723       }
1724       if (xb == b) {
1725         ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1726       } else {
1727         ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1728       }
1729     }
1730   }
1731   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1732   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 /* -------------------------------------------------------------------*/
1737 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1738                                        MatGetRow_SeqSELL,
1739                                        MatRestoreRow_SeqSELL,
1740                                        MatMult_SeqSELL,
1741                                /* 4*/  MatMultAdd_SeqSELL,
1742                                        MatMultTranspose_SeqSELL,
1743                                        MatMultTransposeAdd_SeqSELL,
1744                                        NULL,
1745                                        NULL,
1746                                        NULL,
1747                                /* 10*/ NULL,
1748                                        NULL,
1749                                        NULL,
1750                                        MatSOR_SeqSELL,
1751                                        NULL,
1752                                /* 15*/ MatGetInfo_SeqSELL,
1753                                        MatEqual_SeqSELL,
1754                                        MatGetDiagonal_SeqSELL,
1755                                        MatDiagonalScale_SeqSELL,
1756                                        NULL,
1757                                /* 20*/ NULL,
1758                                        MatAssemblyEnd_SeqSELL,
1759                                        MatSetOption_SeqSELL,
1760                                        MatZeroEntries_SeqSELL,
1761                                /* 24*/ NULL,
1762                                        NULL,
1763                                        NULL,
1764                                        NULL,
1765                                        NULL,
1766                                /* 29*/ MatSetUp_SeqSELL,
1767                                        NULL,
1768                                        NULL,
1769                                        NULL,
1770                                        NULL,
1771                                /* 34*/ MatDuplicate_SeqSELL,
1772                                        NULL,
1773                                        NULL,
1774                                        NULL,
1775                                        NULL,
1776                                /* 39*/ NULL,
1777                                        NULL,
1778                                        NULL,
1779                                        MatGetValues_SeqSELL,
1780                                        MatCopy_SeqSELL,
1781                                /* 44*/ NULL,
1782                                        MatScale_SeqSELL,
1783                                        MatShift_SeqSELL,
1784                                        NULL,
1785                                        NULL,
1786                                /* 49*/ NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        NULL,
1791                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1792                                        NULL,
1793                                        NULL,
1794                                        NULL,
1795                                        NULL,
1796                                /* 59*/ NULL,
1797                                        MatDestroy_SeqSELL,
1798                                        MatView_SeqSELL,
1799                                        NULL,
1800                                        NULL,
1801                                /* 64*/ NULL,
1802                                        NULL,
1803                                        NULL,
1804                                        NULL,
1805                                        NULL,
1806                                /* 69*/ NULL,
1807                                        NULL,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                /* 74*/ NULL,
1812                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1813                                        NULL,
1814                                        NULL,
1815                                        NULL,
1816                                /* 79*/ NULL,
1817                                        NULL,
1818                                        NULL,
1819                                        NULL,
1820                                        NULL,
1821                                /* 84*/ NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        NULL,
1826                                /* 89*/ NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        NULL,
1831                                /* 94*/ NULL,
1832                                        NULL,
1833                                        NULL,
1834                                        NULL,
1835                                        NULL,
1836                                /* 99*/ NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        MatConjugate_SeqSELL,
1840                                        NULL,
1841                                /*104*/ NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        NULL,
1846                                /*109*/ NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        MatMissingDiagonal_SeqSELL,
1851                                /*114*/ NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        NULL,
1856                                /*119*/ NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        NULL,
1861                                /*124*/ NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        NULL,
1866                                /*129*/ NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        NULL,
1871                                /*134*/ NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        NULL,
1876                                /*139*/ NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        MatFDColoringSetUp_SeqXAIJ,
1880                                        NULL,
1881                                /*144*/ NULL
1882 };
1883 
MatStoreValues_SeqSELL(Mat mat)1884 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1885 {
1886   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1887   PetscErrorCode ierr;
1888 
1889   PetscFunctionBegin;
1890   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1891 
1892   /* allocate space for values if not already there */
1893   if (!a->saved_values) {
1894     ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1895     ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1896   }
1897 
1898   /* copy values over */
1899   ierr = PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1900   PetscFunctionReturn(0);
1901 }
1902 
MatRetrieveValues_SeqSELL(Mat mat)1903 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1904 {
1905   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1910   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1911   ierr = PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 /*@C
1916  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1917 
1918  Not Collective
1919 
1920  Input Parameters:
1921  .  mat - a MATSEQSELL matrix
1922  .  array - pointer to the data
1923 
1924  Level: intermediate
1925 
1926  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1927  @*/
MatSeqSELLRestoreArray(Mat A,PetscScalar ** array)1928 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1929 {
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
MatCreate_SeqSELL(Mat B)1937 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1938 {
1939   Mat_SeqSELL    *b;
1940   PetscMPIInt    size;
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1945   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1946 
1947   ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1948 
1949   B->data = (void*)b;
1950 
1951   ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1952 
1953   b->row                = NULL;
1954   b->col                = NULL;
1955   b->icol               = NULL;
1956   b->reallocs           = 0;
1957   b->ignorezeroentries  = PETSC_FALSE;
1958   b->roworiented        = PETSC_TRUE;
1959   b->nonew              = 0;
1960   b->diag               = NULL;
1961   b->solve_work         = NULL;
1962   B->spptr              = NULL;
1963   b->saved_values       = NULL;
1964   b->idiag              = NULL;
1965   b->mdiag              = NULL;
1966   b->ssor_work          = NULL;
1967   b->omega              = 1.0;
1968   b->fshift             = 0.0;
1969   b->idiagvalid         = PETSC_FALSE;
1970   b->keepnonzeropattern = PETSC_FALSE;
1971 
1972   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1973   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1974   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1975   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1976   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1977   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1978   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 /*
1983  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1984  */
MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)1985 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1986 {
1987   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1988   PetscInt       i,m=A->rmap->n;
1989   PetscInt       totalslices=a->totalslices;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   c = (Mat_SeqSELL*)C->data;
1994 
1995   C->factortype = A->factortype;
1996   c->row        = NULL;
1997   c->col        = NULL;
1998   c->icol       = NULL;
1999   c->reallocs   = 0;
2000 
2001   C->assembled = PETSC_TRUE;
2002 
2003   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
2004   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
2005 
2006   ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
2007   ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
2008   ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
2009   ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
2010 
2011   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
2012   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
2013 
2014   /* allocate the matrix space */
2015   if (mallocmatspace) {
2016     ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
2017     ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2018 
2019     c->singlemalloc = PETSC_TRUE;
2020 
2021     if (m > 0) {
2022       ierr = PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);CHKERRQ(ierr);
2023       if (cpvalues == MAT_COPY_VALUES) {
2024         ierr = PetscArraycpy(c->val,a->val,a->maxallocmat);CHKERRQ(ierr);
2025       } else {
2026         ierr = PetscArrayzero(c->val,a->maxallocmat);CHKERRQ(ierr);
2027       }
2028     }
2029   }
2030 
2031   c->ignorezeroentries = a->ignorezeroentries;
2032   c->roworiented       = a->roworiented;
2033   c->nonew             = a->nonew;
2034   if (a->diag) {
2035     ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
2036     ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
2037     for (i=0; i<m; i++) {
2038       c->diag[i] = a->diag[i];
2039     }
2040   } else c->diag = NULL;
2041 
2042   c->solve_work         = NULL;
2043   c->saved_values       = NULL;
2044   c->idiag              = NULL;
2045   c->ssor_work          = NULL;
2046   c->keepnonzeropattern = a->keepnonzeropattern;
2047   c->free_val           = PETSC_TRUE;
2048   c->free_colidx        = PETSC_TRUE;
2049 
2050   c->maxallocmat  = a->maxallocmat;
2051   c->maxallocrow  = a->maxallocrow;
2052   c->rlenmax      = a->rlenmax;
2053   c->nz           = a->nz;
2054   C->preallocated = PETSC_TRUE;
2055 
2056   c->nonzerorowcnt = a->nonzerorowcnt;
2057   C->nonzerostate  = A->nonzerostate;
2058 
2059   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat * B)2063 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2064 {
2065   PetscErrorCode ierr;
2066 
2067   PetscFunctionBegin;
2068   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
2069   ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
2070   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2071     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
2072   }
2073   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2074   ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /*@C
2079  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2080 
2081  Collective
2082 
2083  Input Parameters:
2084 +  comm - MPI communicator, set to PETSC_COMM_SELF
2085 .  m - number of rows
2086 .  n - number of columns
2087 .  rlenmax - maximum number of nonzeros in a row
2088 -  rlen - array containing the number of nonzeros in the various rows
2089  (possibly different for each row) or NULL
2090 
2091  Output Parameter:
2092 .  A - the matrix
2093 
2094  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2095  MatXXXXSetPreallocation() paradigm instead of this routine directly.
2096  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2097 
2098  Notes:
2099  If nnz is given then nz is ignored
2100 
2101  Specify the preallocated storage with either rlenmax or rlen (not both).
2102  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2103  allocation.  For large problems you MUST preallocate memory or you
2104  will get TERRIBLE performance, see the users' manual chapter on matrices.
2105 
2106  Level: intermediate
2107 
2108  .seealso: MatCreate(), MatCreateSELL(), MatSetValues()
2109 
2110  @*/
MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat * A)2111 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2112 {
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2117   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2118   ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
2119   ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)2123 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2124 {
2125   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2126   PetscInt       totalslices=a->totalslices;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   /* If the  matrix dimensions are not equal,or no of nonzeros */
2131   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2132     *flg = PETSC_FALSE;
2133     PetscFunctionReturn(0);
2134   }
2135   /* if the a->colidx are the same */
2136   ierr = PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);CHKERRQ(ierr);
2137   if (!*flg) PetscFunctionReturn(0);
2138   /* if a->val are the same */
2139   ierr = PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
MatSeqSELLInvalidateDiagonal(Mat A)2143 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2144 {
2145   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2146 
2147   PetscFunctionBegin;
2148   a->idiagvalid  = PETSC_FALSE;
2149   PetscFunctionReturn(0);
2150 }
2151 
MatConjugate_SeqSELL(Mat A)2152 PetscErrorCode MatConjugate_SeqSELL(Mat A)
2153 {
2154 #if defined(PETSC_USE_COMPLEX)
2155   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2156   PetscInt    i;
2157   PetscScalar *val = a->val;
2158 
2159   PetscFunctionBegin;
2160   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2161     val[i] = PetscConj(val[i]);
2162   }
2163 #else
2164   PetscFunctionBegin;
2165 #endif
2166   PetscFunctionReturn(0);
2167 }
2168