1
2 /*
3 Defines the basic matrix operations for the SELL matrix storage format.
4 */
5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blocktranspose.h>
8 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
9
10 #include <immintrin.h>
11
12 #if !defined(_MM_SCALE_8)
13 #define _MM_SCALE_8 8
14 #endif
15
16 #if defined(__AVX512F__)
17 /* these do not work
18 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
19 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
20 */
21 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
22 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
23 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \
24 vec_vals = _mm512_loadu_pd(aval); \
25 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
26 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
27 #elif defined(__AVX2__) && defined(__FMA__)
28 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
29 vec_vals = _mm256_loadu_pd(aval); \
30 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \
31 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
32 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
33 #endif
34 #endif /* PETSC_HAVE_IMMINTRIN_H */
35
36 /*@C
37 MatSeqSELLSetPreallocation - For good matrix assembly performance
38 the user should preallocate the matrix storage by setting the parameter nz
39 (or the array nnz). By setting these parameters accurately, performance
40 during matrix assembly can be increased significantly.
41
42 Collective
43
44 Input Parameters:
45 + B - The matrix
46 . nz - number of nonzeros per row (same for all rows)
47 - nnz - array containing the number of nonzeros in the various rows
48 (possibly different for each row) or NULL
49
50 Notes:
51 If nnz is given then nz is ignored.
52
53 Specify the preallocated storage with either nz or nnz (not both).
54 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
55 allocation. For large problems you MUST preallocate memory or you
56 will get TERRIBLE performance, see the users' manual chapter on matrices.
57
58 You can call MatGetInfo() to get information on how effective the preallocation was;
59 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
60 You can also run with the option -info and look for messages with the string
61 malloc in them to see if additional memory allocation was needed.
62
63 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
64 entries or columns indices.
65
66 The maximum number of nonzeos in any row should be as accurate as possible.
67 If it is underestimated, you will get bad performance due to reallocation
68 (MatSeqXSELLReallocateSELL).
69
70 Level: intermediate
71
72 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
73
74 @*/
MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])75 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
76 {
77 PetscErrorCode ierr;
78
79 PetscFunctionBegin;
80 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
81 PetscValidType(B,1);
82 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr);
83 PetscFunctionReturn(0);
84 }
85
MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])86 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
87 {
88 Mat_SeqSELL *b;
89 PetscInt i,j,totalslices;
90 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
91 PetscErrorCode ierr;
92
93 PetscFunctionBegin;
94 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
95 if (maxallocrow == MAT_SKIP_ALLOCATION) {
96 skipallocation = PETSC_TRUE;
97 maxallocrow = 0;
98 }
99
100 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
101 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
102
103 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
104 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
105 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
106 if (rlen) {
107 for (i=0; i<B->rmap->n; i++) {
108 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
109 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
110 }
111 }
112
113 B->preallocated = PETSC_TRUE;
114
115 b = (Mat_SeqSELL*)B->data;
116
117 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
118 b->totalslices = totalslices;
119 if (!skipallocation) {
120 if (B->rmap->n & 0x07) {ierr = PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);CHKERRQ(ierr);}
121
122 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
123 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr);
124 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
125 }
126 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
127 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
128 else if (maxallocrow < 0) maxallocrow = 1;
129 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
130 } else {
131 maxallocrow = 0;
132 b->sliidx[0] = 0;
133 for (i=1; i<totalslices; i++) {
134 b->sliidx[i] = 0;
135 for (j=0;j<8;j++) {
136 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
137 }
138 maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
139 ierr = PetscIntSumError(b->sliidx[i-1],8*b->sliidx[i],&b->sliidx[i]);CHKERRQ(ierr);
140 }
141 /* last slice */
142 b->sliidx[totalslices] = 0;
143 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
144 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
145 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
146 }
147
148 /* allocate space for val, colidx, rlen */
149 /* FIXME: should B's old memory be unlogged? */
150 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr);
151 /* FIXME: assuming an element of the bit array takes 8 bits */
152 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr);
153 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
154 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
155 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr);
156 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr);
157
158 b->singlemalloc = PETSC_TRUE;
159 b->free_val = PETSC_TRUE;
160 b->free_colidx = PETSC_TRUE;
161 } else {
162 b->free_val = PETSC_FALSE;
163 b->free_colidx = PETSC_FALSE;
164 }
165
166 b->nz = 0;
167 b->maxallocrow = maxallocrow;
168 b->rlenmax = maxallocrow;
169 b->maxallocmat = b->sliidx[totalslices];
170 B->info.nz_unneeded = (double)b->maxallocmat;
171 if (realalloc) {
172 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
173 }
174 PetscFunctionReturn(0);
175 }
176
MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)177 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
178 {
179 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
180 PetscInt shift;
181
182 PetscFunctionBegin;
183 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
184 if (nz) *nz = a->rlen[row];
185 shift = a->sliidx[row>>3]+(row&0x07);
186 if (!a->getrowcols) {
187 PetscErrorCode ierr;
188
189 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr);
190 }
191 if (idx) {
192 PetscInt j;
193 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
194 *idx = a->getrowcols;
195 }
196 if (v) {
197 PetscInt j;
198 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
199 *v = a->getrowvals;
200 }
201 PetscFunctionReturn(0);
202 }
203
MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)204 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
205 {
206 PetscFunctionBegin;
207 PetscFunctionReturn(0);
208 }
209
MatConvert_SeqSELL_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)210 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
211 {
212 Mat B;
213 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
214 PetscInt i;
215 PetscErrorCode ierr;
216
217 PetscFunctionBegin;
218 if (reuse == MAT_REUSE_MATRIX) {
219 B = *newmat;
220 ierr = MatZeroEntries(B);CHKERRQ(ierr);
221 } else {
222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
224 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
225 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr);
226 }
227
228 for (i=0; i<A->rmap->n; i++) {
229 PetscInt nz = 0,*cols = NULL;
230 PetscScalar *vals = NULL;
231
232 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
233 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
234 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
235 }
236
237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239 B->rmap->bs = A->rmap->bs;
240
241 if (reuse == MAT_INPLACE_MATRIX) {
242 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
243 } else {
244 *newmat = B;
245 }
246 PetscFunctionReturn(0);
247 }
248
249 #include <../src/mat/impls/aij/seq/aij.h>
250
MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)251 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
252 {
253 Mat B;
254 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
255 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
256 const PetscInt *cols;
257 const PetscScalar *vals;
258 PetscErrorCode ierr;
259
260 PetscFunctionBegin;
261
262 if (reuse == MAT_REUSE_MATRIX) {
263 B = *newmat;
264 } else {
265 /* Can we just use ilen? */
266 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr);
267 for (i=0; i<m; i++) {
268 rowlengths[i] = ai[i+1] - ai[i];
269 }
270
271 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
272 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr);
273 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr);
274 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr);
275 ierr = PetscFree(rowlengths);CHKERRQ(ierr);
276 }
277
278 for (row=0; row<m; row++) {
279 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
280 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
281 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
282 }
283 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
284 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
285 B->rmap->bs = A->rmap->bs;
286
287 if (reuse == MAT_INPLACE_MATRIX) {
288 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
289 } else {
290 *newmat = B;
291 }
292 PetscFunctionReturn(0);
293 }
294
MatMult_SeqSELL(Mat A,Vec xx,Vec yy)295 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
296 {
297 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
298 PetscScalar *y;
299 const PetscScalar *x;
300 const MatScalar *aval=a->val;
301 PetscInt totalslices=a->totalslices;
302 const PetscInt *acolidx=a->colidx;
303 PetscInt i,j;
304 PetscErrorCode ierr;
305 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
306 __m512d vec_x,vec_y,vec_vals;
307 __m256i vec_idx;
308 __mmask8 mask;
309 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
310 __m256i vec_idx2,vec_idx3,vec_idx4;
311 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
312 __m128i vec_idx;
313 __m256d vec_x,vec_y,vec_y2,vec_vals;
314 MatScalar yval;
315 PetscInt r,rows_left,row,nnz_in_row;
316 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
317 __m128d vec_x_tmp;
318 __m256d vec_x,vec_y,vec_y2,vec_vals;
319 MatScalar yval;
320 PetscInt r,rows_left,row,nnz_in_row;
321 #else
322 PetscScalar sum[8];
323 #endif
324
325 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
326 #pragma disjoint(*x,*y,*aval)
327 #endif
328
329 PetscFunctionBegin;
330 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
331 ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
332 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
333 for (i=0; i<totalslices; i++) { /* loop over slices */
334 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
335 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
336
337 vec_y = _mm512_setzero_pd();
338 vec_y2 = _mm512_setzero_pd();
339 vec_y3 = _mm512_setzero_pd();
340 vec_y4 = _mm512_setzero_pd();
341
342 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
343 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
344 case 3:
345 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
346 acolidx += 8; aval += 8;
347 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
348 acolidx += 8; aval += 8;
349 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
350 acolidx += 8; aval += 8;
351 j += 3;
352 break;
353 case 2:
354 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
355 acolidx += 8; aval += 8;
356 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
357 acolidx += 8; aval += 8;
358 j += 2;
359 break;
360 case 1:
361 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
362 acolidx += 8; aval += 8;
363 j += 1;
364 break;
365 }
366 #pragma novector
367 for (; j<(a->sliidx[i+1]>>3); j+=4) {
368 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
369 acolidx += 8; aval += 8;
370 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
371 acolidx += 8; aval += 8;
372 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
373 acolidx += 8; aval += 8;
374 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
375 acolidx += 8; aval += 8;
376 }
377
378 vec_y = _mm512_add_pd(vec_y,vec_y2);
379 vec_y = _mm512_add_pd(vec_y,vec_y3);
380 vec_y = _mm512_add_pd(vec_y,vec_y4);
381 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
382 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
383 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y);
384 } else {
385 _mm512_storeu_pd(&y[8*i],vec_y);
386 }
387 }
388 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
389 for (i=0; i<totalslices; i++) { /* loop over full slices */
390 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
391 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
392
393 /* last slice may have padding rows. Don't use vectorization. */
394 if (i == totalslices-1 && (A->rmap->n & 0x07)) {
395 rows_left = A->rmap->n - 8*i;
396 for (r=0; r<rows_left; ++r) {
397 yval = (MatScalar)0;
398 row = 8*i + r;
399 nnz_in_row = a->rlen[row];
400 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
401 y[row] = yval;
402 }
403 break;
404 }
405
406 vec_y = _mm256_setzero_pd();
407 vec_y2 = _mm256_setzero_pd();
408
409 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
410 #pragma novector
411 #pragma unroll(2)
412 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
413 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
414 aval += 4; acolidx += 4;
415 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
416 aval += 4; acolidx += 4;
417 }
418
419 _mm256_storeu_pd(y+i*8,vec_y);
420 _mm256_storeu_pd(y+i*8+4,vec_y2);
421 }
422 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
423 for (i=0; i<totalslices; i++) { /* loop over full slices */
424 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
425 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
426
427 vec_y = _mm256_setzero_pd();
428 vec_y2 = _mm256_setzero_pd();
429
430 /* last slice may have padding rows. Don't use vectorization. */
431 if (i == totalslices-1 && (A->rmap->n & 0x07)) {
432 rows_left = A->rmap->n - 8*i;
433 for (r=0; r<rows_left; ++r) {
434 yval = (MatScalar)0;
435 row = 8*i + r;
436 nnz_in_row = a->rlen[row];
437 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
438 y[row] = yval;
439 }
440 break;
441 }
442
443 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
444 #pragma novector
445 #pragma unroll(2)
446 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
447 vec_vals = _mm256_loadu_pd(aval);
448 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
449 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
450 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
451 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
452 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
453 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
454 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
455 aval += 4;
456
457 vec_vals = _mm256_loadu_pd(aval);
458 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
459 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
460 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
461 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
462 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
463 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
464 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
465 aval += 4;
466 }
467
468 _mm256_storeu_pd(y + i*8, vec_y);
469 _mm256_storeu_pd(y + i*8 + 4, vec_y2);
470 }
471 #else
472 for (i=0; i<totalslices; i++) { /* loop over slices */
473 for (j=0; j<8; j++) sum[j] = 0.0;
474 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
475 sum[0] += aval[j] * x[acolidx[j]];
476 sum[1] += aval[j+1] * x[acolidx[j+1]];
477 sum[2] += aval[j+2] * x[acolidx[j+2]];
478 sum[3] += aval[j+3] * x[acolidx[j+3]];
479 sum[4] += aval[j+4] * x[acolidx[j+4]];
480 sum[5] += aval[j+5] * x[acolidx[j+5]];
481 sum[6] += aval[j+6] * x[acolidx[j+6]];
482 sum[7] += aval[j+7] * x[acolidx[j+7]];
483 }
484 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
485 for (j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
486 } else {
487 for (j=0; j<8; j++) y[8*i+j] = sum[j];
488 }
489 }
490 #endif
491
492 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */
493 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
494 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
495 PetscFunctionReturn(0);
496 }
497
498 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)499 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
500 {
501 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
502 PetscScalar *y,*z;
503 const PetscScalar *x;
504 const MatScalar *aval=a->val;
505 PetscInt totalslices=a->totalslices;
506 const PetscInt *acolidx=a->colidx;
507 PetscInt i,j;
508 PetscErrorCode ierr;
509 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
510 __m512d vec_x,vec_y,vec_vals;
511 __m256i vec_idx;
512 __mmask8 mask;
513 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
514 __m256i vec_idx2,vec_idx3,vec_idx4;
515 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
516 __m128d vec_x_tmp;
517 __m256d vec_x,vec_y,vec_y2,vec_vals;
518 MatScalar yval;
519 PetscInt r,row,nnz_in_row;
520 #else
521 PetscScalar sum[8];
522 #endif
523
524 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
525 #pragma disjoint(*x,*y,*aval)
526 #endif
527
528 PetscFunctionBegin;
529 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
530 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
531 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
532 for (i=0; i<totalslices; i++) { /* loop over slices */
533 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
534 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
535
536 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
537 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
538 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]);
539 } else {
540 vec_y = _mm512_loadu_pd(&y[8*i]);
541 }
542 vec_y2 = _mm512_setzero_pd();
543 vec_y3 = _mm512_setzero_pd();
544 vec_y4 = _mm512_setzero_pd();
545
546 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
547 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
548 case 3:
549 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
550 acolidx += 8; aval += 8;
551 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
552 acolidx += 8; aval += 8;
553 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
554 acolidx += 8; aval += 8;
555 j += 3;
556 break;
557 case 2:
558 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
559 acolidx += 8; aval += 8;
560 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
561 acolidx += 8; aval += 8;
562 j += 2;
563 break;
564 case 1:
565 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
566 acolidx += 8; aval += 8;
567 j += 1;
568 break;
569 }
570 #pragma novector
571 for (; j<(a->sliidx[i+1]>>3); j+=4) {
572 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
573 acolidx += 8; aval += 8;
574 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
575 acolidx += 8; aval += 8;
576 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
577 acolidx += 8; aval += 8;
578 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
579 acolidx += 8; aval += 8;
580 }
581
582 vec_y = _mm512_add_pd(vec_y,vec_y2);
583 vec_y = _mm512_add_pd(vec_y,vec_y3);
584 vec_y = _mm512_add_pd(vec_y,vec_y4);
585 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
586 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y);
587 } else {
588 _mm512_storeu_pd(&z[8*i],vec_y);
589 }
590 }
591 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
592 for (i=0; i<totalslices; i++) { /* loop over full slices */
593 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
594 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
595
596 /* last slice may have padding rows. Don't use vectorization. */
597 if (i == totalslices-1 && (A->rmap->n & 0x07)) {
598 for (r=0; r<(A->rmap->n & 0x07); ++r) {
599 row = 8*i + r;
600 yval = (MatScalar)0.0;
601 nnz_in_row = a->rlen[row];
602 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
603 z[row] = y[row] + yval;
604 }
605 break;
606 }
607
608 vec_y = _mm256_loadu_pd(y+8*i);
609 vec_y2 = _mm256_loadu_pd(y+8*i+4);
610
611 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
612 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
613 vec_vals = _mm256_loadu_pd(aval);
614 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
615 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
616 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
617 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
618 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
619 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
620 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
621 aval += 4;
622
623 vec_vals = _mm256_loadu_pd(aval);
624 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
625 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
626 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
627 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
628 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
629 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
630 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
631 aval += 4;
632 }
633
634 _mm256_storeu_pd(z+i*8,vec_y);
635 _mm256_storeu_pd(z+i*8+4,vec_y2);
636 }
637 #else
638 for (i=0; i<totalslices; i++) { /* loop over slices */
639 for (j=0; j<8; j++) sum[j] = 0.0;
640 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
641 sum[0] += aval[j] * x[acolidx[j]];
642 sum[1] += aval[j+1] * x[acolidx[j+1]];
643 sum[2] += aval[j+2] * x[acolidx[j+2]];
644 sum[3] += aval[j+3] * x[acolidx[j+3]];
645 sum[4] += aval[j+4] * x[acolidx[j+4]];
646 sum[5] += aval[j+5] * x[acolidx[j+5]];
647 sum[6] += aval[j+6] * x[acolidx[j+6]];
648 sum[7] += aval[j+7] * x[acolidx[j+7]];
649 }
650 if (i == totalslices-1 && (A->rmap->n & 0x07)) {
651 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
652 } else {
653 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
654 }
655 }
656 #endif
657
658 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
659 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
660 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
661 PetscFunctionReturn(0);
662 }
663
MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)664 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
665 {
666 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
667 PetscScalar *y;
668 const PetscScalar *x;
669 const MatScalar *aval=a->val;
670 const PetscInt *acolidx=a->colidx;
671 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices;
672 PetscErrorCode ierr;
673
674 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
675 #pragma disjoint(*x,*y,*aval)
676 #endif
677
678 PetscFunctionBegin;
679 if (A->symmetric) {
680 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr);
681 PetscFunctionReturn(0);
682 }
683 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); }
684 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
685 ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
686 for (i=0; i<a->totalslices; i++) { /* loop over slices */
687 if (i == totalslices-1 && (A->rmap->n & 0x07)) {
688 for (r=0; r<(A->rmap->n & 0x07); ++r) {
689 row = 8*i + r;
690 nnz_in_row = a->rlen[row];
691 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
692 }
693 break;
694 }
695 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
696 y[acolidx[j]] += aval[j] * x[8*i];
697 y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
698 y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
699 y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
700 y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
701 y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
702 y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
703 y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
704 }
705 }
706 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr);
707 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
708 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
709 PetscFunctionReturn(0);
710 }
711
MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)712 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
713 {
714 PetscErrorCode ierr;
715
716 PetscFunctionBegin;
717 if (A->symmetric) {
718 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr);
719 } else {
720 ierr = VecSet(yy,0.0);CHKERRQ(ierr);
721 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr);
722 }
723 PetscFunctionReturn(0);
724 }
725
726 /*
727 Checks for missing diagonals
728 */
MatMissingDiagonal_SeqSELL(Mat A,PetscBool * missing,PetscInt * d)729 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d)
730 {
731 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
732 PetscInt *diag,i;
733 PetscErrorCode ierr;
734
735 PetscFunctionBegin;
736 *missing = PETSC_FALSE;
737 if (A->rmap->n > 0 && !(a->colidx)) {
738 *missing = PETSC_TRUE;
739 if (d) *d = 0;
740 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
741 } else {
742 diag = a->diag;
743 for (i=0; i<A->rmap->n; i++) {
744 if (diag[i] == -1) {
745 *missing = PETSC_TRUE;
746 if (d) *d = i;
747 ierr = PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);CHKERRQ(ierr);
748 break;
749 }
750 }
751 }
752 PetscFunctionReturn(0);
753 }
754
MatMarkDiagonal_SeqSELL(Mat A)755 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
756 {
757 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
758 PetscInt i,j,m=A->rmap->n,shift;
759 PetscErrorCode ierr;
760
761 PetscFunctionBegin;
762 if (!a->diag) {
763 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
764 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
765 a->free_diag = PETSC_TRUE;
766 }
767 for (i=0; i<m; i++) { /* loop over rows */
768 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
769 a->diag[i] = -1;
770 for (j=0; j<a->rlen[i]; j++) {
771 if (a->colidx[shift+j*8] == i) {
772 a->diag[i] = shift+j*8;
773 break;
774 }
775 }
776 }
777 PetscFunctionReturn(0);
778 }
779
780 /*
781 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
782 */
MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)783 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
784 {
785 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data;
786 PetscInt i,*diag,m = A->rmap->n;
787 MatScalar *val = a->val;
788 PetscScalar *idiag,*mdiag;
789 PetscErrorCode ierr;
790
791 PetscFunctionBegin;
792 if (a->idiagvalid) PetscFunctionReturn(0);
793 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
794 diag = a->diag;
795 if (!a->idiag) {
796 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr);
797 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr);
798 val = a->val;
799 }
800 mdiag = a->mdiag;
801 idiag = a->idiag;
802
803 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
804 for (i=0; i<m; i++) {
805 mdiag[i] = val[diag[i]];
806 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
807 if (PetscRealPart(fshift)) {
808 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr);
809 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
810 A->factorerror_zeropivot_value = 0.0;
811 A->factorerror_zeropivot_row = i;
812 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
813 }
814 idiag[i] = 1.0/val[diag[i]];
815 }
816 ierr = PetscLogFlops(m);CHKERRQ(ierr);
817 } else {
818 for (i=0; i<m; i++) {
819 mdiag[i] = val[diag[i]];
820 idiag[i] = omega/(fshift + val[diag[i]]);
821 }
822 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr);
823 }
824 a->idiagvalid = PETSC_TRUE;
825 PetscFunctionReturn(0);
826 }
827
MatZeroEntries_SeqSELL(Mat A)828 PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
829 {
830 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
831 PetscErrorCode ierr;
832
833 PetscFunctionBegin;
834 ierr = PetscArrayzero(a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
835 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
836 PetscFunctionReturn(0);
837 }
838
MatDestroy_SeqSELL(Mat A)839 PetscErrorCode MatDestroy_SeqSELL(Mat A)
840 {
841 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
842 PetscErrorCode ierr;
843
844 PetscFunctionBegin;
845 #if defined(PETSC_USE_LOG)
846 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
847 #endif
848 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr);
849 ierr = ISDestroy(&a->row);CHKERRQ(ierr);
850 ierr = ISDestroy(&a->col);CHKERRQ(ierr);
851 ierr = PetscFree(a->diag);CHKERRQ(ierr);
852 ierr = PetscFree(a->rlen);CHKERRQ(ierr);
853 ierr = PetscFree(a->sliidx);CHKERRQ(ierr);
854 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr);
855 ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
856 ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
857 ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
858 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr);
859
860 ierr = PetscFree(A->data);CHKERRQ(ierr);
861
862 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr);
866 PetscFunctionReturn(0);
867 }
868
MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)869 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
870 {
871 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
872 PetscErrorCode ierr;
873
874 PetscFunctionBegin;
875 switch (op) {
876 case MAT_ROW_ORIENTED:
877 a->roworiented = flg;
878 break;
879 case MAT_KEEP_NONZERO_PATTERN:
880 a->keepnonzeropattern = flg;
881 break;
882 case MAT_NEW_NONZERO_LOCATIONS:
883 a->nonew = (flg ? 0 : 1);
884 break;
885 case MAT_NEW_NONZERO_LOCATION_ERR:
886 a->nonew = (flg ? -1 : 0);
887 break;
888 case MAT_NEW_NONZERO_ALLOCATION_ERR:
889 a->nonew = (flg ? -2 : 0);
890 break;
891 case MAT_UNUSED_NONZERO_LOCATION_ERR:
892 a->nounused = (flg ? -1 : 0);
893 break;
894 case MAT_NEW_DIAGONALS:
895 case MAT_IGNORE_OFF_PROC_ENTRIES:
896 case MAT_USE_HASH_TABLE:
897 case MAT_SORTED_FULL:
898 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
899 break;
900 case MAT_SPD:
901 case MAT_SYMMETRIC:
902 case MAT_STRUCTURALLY_SYMMETRIC:
903 case MAT_HERMITIAN:
904 case MAT_SYMMETRY_ETERNAL:
905 /* These options are handled directly by MatSetOption() */
906 break;
907 default:
908 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
909 }
910 PetscFunctionReturn(0);
911 }
912
MatGetDiagonal_SeqSELL(Mat A,Vec v)913 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
914 {
915 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
916 PetscInt i,j,n,shift;
917 PetscScalar *x,zero=0.0;
918 PetscErrorCode ierr;
919
920 PetscFunctionBegin;
921 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
922 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
923
924 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
925 PetscInt *diag=a->diag;
926 ierr = VecGetArray(v,&x);CHKERRQ(ierr);
927 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
928 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
929 PetscFunctionReturn(0);
930 }
931
932 ierr = VecSet(v,zero);CHKERRQ(ierr);
933 ierr = VecGetArray(v,&x);CHKERRQ(ierr);
934 for (i=0; i<n; i++) { /* loop over rows */
935 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
936 x[i] = 0;
937 for (j=0; j<a->rlen[i]; j++) {
938 if (a->colidx[shift+j*8] == i) {
939 x[i] = a->val[shift+j*8];
940 break;
941 }
942 }
943 }
944 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
945 PetscFunctionReturn(0);
946 }
947
MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)948 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
949 {
950 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
951 const PetscScalar *l,*r;
952 PetscInt i,j,m,n,row;
953 PetscErrorCode ierr;
954
955 PetscFunctionBegin;
956 if (ll) {
957 /* The local size is used so that VecMPI can be passed to this routine
958 by MatDiagonalScale_MPISELL */
959 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr);
960 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
961 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
962 for (i=0; i<a->totalslices; i++) { /* loop over slices */
963 if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
964 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
965 if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8*i+row];
966 }
967 } else {
968 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
969 a->val[j] *= l[8*i+row];
970 }
971 }
972 }
973 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
974 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
975 }
976 if (rr) {
977 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr);
978 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
979 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
980 for (i=0; i<a->totalslices; i++) { /* loop over slices */
981 if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
982 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
983 if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]];
984 }
985 } else {
986 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
987 a->val[j] *= r[a->colidx[j]];
988 }
989 }
990 }
991 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
992 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
993 }
994 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
995 PetscFunctionReturn(0);
996 }
997
998 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
999
MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])1000 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1001 {
1002 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1003 PetscInt *cp,i,k,low,high,t,row,col,l;
1004 PetscInt shift;
1005 MatScalar *vp;
1006
1007 PetscFunctionBegin;
1008 for (k=0; k<m; k++) { /* loop over requested rows */
1009 row = im[k];
1010 if (row<0) continue;
1011 if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1012 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1013 cp = a->colidx+shift; /* pointer to the row */
1014 vp = a->val+shift; /* pointer to the row */
1015 for (l=0; l<n; l++) { /* loop over requested columns */
1016 col = in[l];
1017 if (col<0) continue;
1018 if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
1019 high = a->rlen[row]; low = 0; /* assume unsorted */
1020 while (high-low > 5) {
1021 t = (low+high)/2;
1022 if (*(cp+t*8) > col) high = t;
1023 else low = t;
1024 }
1025 for (i=low; i<high; i++) {
1026 if (*(cp+8*i) > col) break;
1027 if (*(cp+8*i) == col) {
1028 *v++ = *(vp+8*i);
1029 goto finished;
1030 }
1031 }
1032 *v++ = 0.0;
1033 finished:;
1034 }
1035 }
1036 PetscFunctionReturn(0);
1037 }
1038
MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)1039 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1040 {
1041 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1042 PetscInt i,j,m=A->rmap->n,shift;
1043 const char *name;
1044 PetscViewerFormat format;
1045 PetscErrorCode ierr;
1046
1047 PetscFunctionBegin;
1048 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1049 if (format == PETSC_VIEWER_ASCII_MATLAB) {
1050 PetscInt nofinalvalue = 0;
1051 /*
1052 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1053 nofinalvalue = 1;
1054 }
1055 */
1056 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1057 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr);
1058 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr);
1059 #if defined(PETSC_USE_COMPLEX)
1060 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1061 #else
1062 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr);
1063 #endif
1064 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr);
1065
1066 for (i=0; i<m; i++) {
1067 shift = a->sliidx[i>>3]+(i&0x07);
1068 for (j=0; j<a->rlen[i]; j++) {
1069 #if defined(PETSC_USE_COMPLEX)
1070 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1071 #else
1072 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1073 #endif
1074 }
1075 }
1076 /*
1077 if (nofinalvalue) {
1078 #if defined(PETSC_USE_COMPLEX)
1079 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr);
1080 #else
1081 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr);
1082 #endif
1083 }
1084 */
1085 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
1086 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr);
1087 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1088 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1089 PetscFunctionReturn(0);
1090 } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1091 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1092 for (i=0; i<m; i++) {
1093 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1094 shift = a->sliidx[i>>3]+(i&0x07);
1095 for (j=0; j<a->rlen[i]; j++) {
1096 #if defined(PETSC_USE_COMPLEX)
1097 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1098 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1099 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1100 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1101 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1102 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1103 }
1104 #else
1105 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);}
1106 #endif
1107 }
1108 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1109 }
1110 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1111 } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1112 PetscInt cnt=0,jcnt;
1113 PetscScalar value;
1114 #if defined(PETSC_USE_COMPLEX)
1115 PetscBool realonly=PETSC_TRUE;
1116 for (i=0; i<a->sliidx[a->totalslices]; i++) {
1117 if (PetscImaginaryPart(a->val[i]) != 0.0) {
1118 realonly = PETSC_FALSE;
1119 break;
1120 }
1121 }
1122 #endif
1123
1124 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1125 for (i=0; i<m; i++) {
1126 jcnt = 0;
1127 shift = a->sliidx[i>>3]+(i&0x07);
1128 for (j=0; j<A->cmap->n; j++) {
1129 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1130 value = a->val[cnt++];
1131 jcnt++;
1132 } else {
1133 value = 0.0;
1134 }
1135 #if defined(PETSC_USE_COMPLEX)
1136 if (realonly) {
1137 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr);
1138 } else {
1139 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr);
1140 }
1141 #else
1142 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr);
1143 #endif
1144 }
1145 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1146 }
1147 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1148 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1149 PetscInt fshift=1;
1150 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1151 #if defined(PETSC_USE_COMPLEX)
1152 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr);
1153 #else
1154 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr);
1155 #endif
1156 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr);
1157 for (i=0; i<m; i++) {
1158 shift = a->sliidx[i>>3]+(i&0x07);
1159 for (j=0; j<a->rlen[i]; j++) {
1160 #if defined(PETSC_USE_COMPLEX)
1161 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1162 #else
1163 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr);
1164 #endif
1165 }
1166 }
1167 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1168 } else if (format == PETSC_VIEWER_NATIVE) {
1169 for (i=0; i<a->totalslices; i++) { /* loop over slices */
1170 PetscInt row;
1171 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr);
1172 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
1173 #if defined(PETSC_USE_COMPLEX)
1174 if (PetscImaginaryPart(a->val[j]) > 0.0) {
1175 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1176 } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1177 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1178 } else {
1179 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1180 }
1181 #else
1182 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1183 #endif
1184 }
1185 }
1186 } else {
1187 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1188 if (A->factortype) {
1189 for (i=0; i<m; i++) {
1190 shift = a->sliidx[i>>3]+(i&0x07);
1191 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1192 /* L part */
1193 for (j=shift; j<a->diag[i]; j+=8) {
1194 #if defined(PETSC_USE_COMPLEX)
1195 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1196 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1197 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1198 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1199 } else {
1200 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1201 }
1202 #else
1203 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1204 #endif
1205 }
1206 /* diagonal */
1207 j = a->diag[i];
1208 #if defined(PETSC_USE_COMPLEX)
1209 if (PetscImaginaryPart(a->val[j]) > 0.0) {
1210 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr);
1211 } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1212 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr);
1213 } else {
1214 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr);
1215 }
1216 #else
1217 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr);
1218 #endif
1219
1220 /* U part */
1221 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1222 #if defined(PETSC_USE_COMPLEX)
1223 if (PetscImaginaryPart(a->val[j]) > 0.0) {
1224 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr);
1225 } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1226 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr);
1227 } else {
1228 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr);
1229 }
1230 #else
1231 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr);
1232 #endif
1233 }
1234 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1235 }
1236 } else {
1237 for (i=0; i<m; i++) {
1238 shift = a->sliidx[i>>3]+(i&0x07);
1239 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr);
1240 for (j=0; j<a->rlen[i]; j++) {
1241 #if defined(PETSC_USE_COMPLEX)
1242 if (PetscImaginaryPart(a->val[j]) > 0.0) {
1243 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1244 } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1245 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr);
1246 } else {
1247 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr);
1248 }
1249 #else
1250 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);
1251 #endif
1252 }
1253 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1254 }
1255 }
1256 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1257 }
1258 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1259 PetscFunctionReturn(0);
1260 }
1261
1262 #include <petscdraw.h>
MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void * Aa)1263 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1264 {
1265 Mat A=(Mat)Aa;
1266 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1267 PetscInt i,j,m=A->rmap->n,shift;
1268 int color;
1269 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1270 PetscViewer viewer;
1271 PetscViewerFormat format;
1272 PetscErrorCode ierr;
1273
1274 PetscFunctionBegin;
1275 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1276 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1277 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1278
1279 /* loop over matrix elements drawing boxes */
1280
1281 if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1282 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1283 /* Blue for negative, Cyan for zero and Red for positive */
1284 color = PETSC_DRAW_BLUE;
1285 for (i=0; i<m; i++) {
1286 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1287 y_l = m - i - 1.0; y_r = y_l + 1.0;
1288 for (j=0; j<a->rlen[i]; j++) {
1289 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1290 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue;
1291 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1292 }
1293 }
1294 color = PETSC_DRAW_CYAN;
1295 for (i=0; i<m; i++) {
1296 shift = a->sliidx[i>>3]+(i&0x07);
1297 y_l = m - i - 1.0; y_r = y_l + 1.0;
1298 for (j=0; j<a->rlen[i]; j++) {
1299 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1300 if (a->val[shift+8*j] != 0.) continue;
1301 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1302 }
1303 }
1304 color = PETSC_DRAW_RED;
1305 for (i=0; i<m; i++) {
1306 shift = a->sliidx[i>>3]+(i&0x07);
1307 y_l = m - i - 1.0; y_r = y_l + 1.0;
1308 for (j=0; j<a->rlen[i]; j++) {
1309 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1310 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue;
1311 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1312 }
1313 }
1314 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1315 } else {
1316 /* use contour shading to indicate magnitude of values */
1317 /* first determine max of all nonzero values */
1318 PetscReal minv=0.0,maxv=0.0;
1319 PetscInt count=0;
1320 PetscDraw popup;
1321 for (i=0; i<a->sliidx[a->totalslices]; i++) {
1322 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1323 }
1324 if (minv >= maxv) maxv = minv + PETSC_SMALL;
1325 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1326 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr);
1327
1328 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1329 for (i=0; i<m; i++) {
1330 shift = a->sliidx[i>>3]+(i&0x07);
1331 y_l = m - i - 1.0;
1332 y_r = y_l + 1.0;
1333 for (j=0; j<a->rlen[i]; j++) {
1334 x_l = a->colidx[shift+j*8];
1335 x_r = x_l + 1.0;
1336 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1337 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr);
1338 count++;
1339 }
1340 }
1341 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1342 }
1343 PetscFunctionReturn(0);
1344 }
1345
1346 #include <petscdraw.h>
MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)1347 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1348 {
1349 PetscDraw draw;
1350 PetscReal xr,yr,xl,yl,h,w;
1351 PetscBool isnull;
1352 PetscErrorCode ierr;
1353
1354 PetscFunctionBegin;
1355 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1356 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1357 if (isnull) PetscFunctionReturn(0);
1358
1359 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0;
1360 xr += w; yr += h; xl = -w; yl = -h;
1361 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1362 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1363 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr);
1364 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1365 ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1366 PetscFunctionReturn(0);
1367 }
1368
MatView_SeqSELL(Mat A,PetscViewer viewer)1369 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1370 {
1371 PetscBool iascii,isbinary,isdraw;
1372 PetscErrorCode ierr;
1373
1374 PetscFunctionBegin;
1375 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1376 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1377 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1378 if (iascii) {
1379 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr);
1380 } else if (isbinary) {
1381 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */
1382 } else if (isdraw) {
1383 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr);
1384 }
1385 PetscFunctionReturn(0);
1386 }
1387
MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)1388 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1389 {
1390 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1391 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1392 MatScalar *vp;
1393 PetscErrorCode ierr;
1394
1395 PetscFunctionBegin;
1396 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1397 /* To do: compress out the unused elements */
1398 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr);
1399 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr);
1400 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr);
1401 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr);
1402 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1403 for (i=0; i<a->totalslices; ++i) {
1404 shift = a->sliidx[i]; /* starting index of the slice */
1405 cp = a->colidx+shift; /* pointer to the column indices of the slice */
1406 vp = a->val+shift; /* pointer to the nonzero values of the slice */
1407 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1408 row = 8*i + row_in_slice;
1409 nrow = a->rlen[row]; /* number of nonzeros in row */
1410 /*
1411 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1412 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1413 */
1414 lastcol = 0;
1415 if (nrow>0) { /* nonempty row */
1416 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1417 } else if (!row_in_slice) { /* first row of the currect slice is empty */
1418 for (j=1;j<8;j++) {
1419 if (a->rlen[8*i+j]) {
1420 lastcol = cp[j];
1421 break;
1422 }
1423 }
1424 } else {
1425 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1426 }
1427
1428 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1429 cp[8*k+row_in_slice] = lastcol;
1430 vp[8*k+row_in_slice] = (MatScalar)0;
1431 }
1432 }
1433 }
1434
1435 A->info.mallocs += a->reallocs;
1436 a->reallocs = 0;
1437
1438 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1439 PetscFunctionReturn(0);
1440 }
1441
MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo * info)1442 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1443 {
1444 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1445
1446 PetscFunctionBegin;
1447 info->block_size = 1.0;
1448 info->nz_allocated = a->maxallocmat;
1449 info->nz_used = a->sliidx[a->totalslices]; /* include padding zeros */
1450 info->nz_unneeded = (a->maxallocmat-a->sliidx[a->totalslices]);
1451 info->assemblies = A->num_ass;
1452 info->mallocs = A->info.mallocs;
1453 info->memory = ((PetscObject)A)->mem;
1454 if (A->factortype) {
1455 info->fill_ratio_given = A->info.fill_ratio_given;
1456 info->fill_ratio_needed = A->info.fill_ratio_needed;
1457 info->factor_mallocs = A->info.factor_mallocs;
1458 } else {
1459 info->fill_ratio_given = 0;
1460 info->fill_ratio_needed = 0;
1461 info->factor_mallocs = 0;
1462 }
1463 PetscFunctionReturn(0);
1464 }
1465
MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)1466 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1467 {
1468 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1469 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow;
1470 PetscInt *cp,nonew=a->nonew,lastcol=-1;
1471 MatScalar *vp,value;
1472 PetscErrorCode ierr;
1473
1474 PetscFunctionBegin;
1475 for (k=0; k<m; k++) { /* loop over added rows */
1476 row = im[k];
1477 if (row < 0) continue;
1478 if (PetscUnlikelyDebug(row >= A->rmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1479 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1480 cp = a->colidx+shift; /* pointer to the row */
1481 vp = a->val+shift; /* pointer to the row */
1482 nrow = a->rlen[row];
1483 low = 0;
1484 high = nrow;
1485
1486 for (l=0; l<n; l++) { /* loop over added columns */
1487 col = in[l];
1488 if (col<0) continue;
1489 if (PetscUnlikelyDebug(col >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1490 if (a->roworiented) {
1491 value = v[l+k*n];
1492 } else {
1493 value = v[k+l*m];
1494 }
1495 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1496
1497 /* search in this row for the specified colmun, i indicates the column to be set */
1498 if (col <= lastcol) low = 0;
1499 else high = nrow;
1500 lastcol = col;
1501 while (high-low > 5) {
1502 t = (low+high)/2;
1503 if (*(cp+t*8) > col) high = t;
1504 else low = t;
1505 }
1506 for (i=low; i<high; i++) {
1507 if (*(cp+i*8) > col) break;
1508 if (*(cp+i*8) == col) {
1509 if (is == ADD_VALUES) *(vp+i*8) += value;
1510 else *(vp+i*8) = value;
1511 low = i + 1;
1512 goto noinsert;
1513 }
1514 }
1515 if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1516 if (nonew == 1) goto noinsert;
1517 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1518 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1519 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1520 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1521 for (ii=nrow-1; ii>=i; ii--) {
1522 *(cp+(ii+1)*8) = *(cp+ii*8);
1523 *(vp+(ii+1)*8) = *(vp+ii*8);
1524 }
1525 a->rlen[row]++;
1526 *(cp+i*8) = col;
1527 *(vp+i*8) = value;
1528 a->nz++;
1529 A->nonzerostate++;
1530 low = i+1; high++; nrow++;
1531 noinsert:;
1532 }
1533 a->rlen[row] = nrow;
1534 }
1535 PetscFunctionReturn(0);
1536 }
1537
MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)1538 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1539 {
1540 PetscErrorCode ierr;
1541
1542 PetscFunctionBegin;
1543 /* If the two matrices have the same copy implementation, use fast copy. */
1544 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1545 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1546 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1547
1548 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1549 ierr = PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1550 } else {
1551 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1552 }
1553 PetscFunctionReturn(0);
1554 }
1555
MatSetUp_SeqSELL(Mat A)1556 PetscErrorCode MatSetUp_SeqSELL(Mat A)
1557 {
1558 PetscErrorCode ierr;
1559
1560 PetscFunctionBegin;
1561 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1562 PetscFunctionReturn(0);
1563 }
1564
MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar * array[])1565 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1566 {
1567 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1568
1569 PetscFunctionBegin;
1570 *array = a->val;
1571 PetscFunctionReturn(0);
1572 }
1573
MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar * array[])1574 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1575 {
1576 PetscFunctionBegin;
1577 PetscFunctionReturn(0);
1578 }
1579
MatRealPart_SeqSELL(Mat A)1580 PetscErrorCode MatRealPart_SeqSELL(Mat A)
1581 {
1582 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1583 PetscInt i;
1584 MatScalar *aval=a->val;
1585
1586 PetscFunctionBegin;
1587 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1588 PetscFunctionReturn(0);
1589 }
1590
MatImaginaryPart_SeqSELL(Mat A)1591 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1592 {
1593 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1594 PetscInt i;
1595 MatScalar *aval=a->val;
1596 PetscErrorCode ierr;
1597
1598 PetscFunctionBegin;
1599 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1600 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr);
1601 PetscFunctionReturn(0);
1602 }
1603
MatScale_SeqSELL(Mat inA,PetscScalar alpha)1604 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1605 {
1606 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data;
1607 MatScalar *aval=a->val;
1608 PetscScalar oalpha=alpha;
1609 PetscBLASInt one=1,size;
1610 PetscErrorCode ierr;
1611
1612 PetscFunctionBegin;
1613 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr);
1614 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1615 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1616 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr);
1617 PetscFunctionReturn(0);
1618 }
1619
MatShift_SeqSELL(Mat Y,PetscScalar a)1620 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1621 {
1622 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data;
1623 PetscErrorCode ierr;
1624
1625 PetscFunctionBegin;
1626 if (!Y->preallocated || !y->nz) {
1627 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr);
1628 }
1629 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1630 PetscFunctionReturn(0);
1631 }
1632
MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)1633 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1634 {
1635 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1636 PetscScalar *x,sum,*t;
1637 const MatScalar *idiag=NULL,*mdiag;
1638 const PetscScalar *b,*xb;
1639 PetscInt n,m=A->rmap->n,i,j,shift;
1640 const PetscInt *diag;
1641 PetscErrorCode ierr;
1642
1643 PetscFunctionBegin;
1644 its = its*lits;
1645
1646 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1647 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);}
1648 a->fshift = fshift;
1649 a->omega = omega;
1650
1651 diag = a->diag;
1652 t = a->ssor_work;
1653 idiag = a->idiag;
1654 mdiag = a->mdiag;
1655
1656 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1657 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1658 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1659 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1660 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1661 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1662
1663 if (flag & SOR_ZERO_INITIAL_GUESS) {
1664 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1665 for (i=0; i<m; i++) {
1666 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1667 sum = b[i];
1668 n = (diag[i]-shift)/8;
1669 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1670 t[i] = sum;
1671 x[i] = sum*idiag[i];
1672 }
1673 xb = t;
1674 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1675 } else xb = b;
1676 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1677 for (i=m-1; i>=0; i--) {
1678 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1679 sum = xb[i];
1680 n = a->rlen[i]-(diag[i]-shift)/8-1;
1681 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1682 if (xb == b) {
1683 x[i] = sum*idiag[i];
1684 } else {
1685 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1686 }
1687 }
1688 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1689 }
1690 its--;
1691 }
1692 while (its--) {
1693 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1694 for (i=0; i<m; i++) {
1695 /* lower */
1696 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1697 sum = b[i];
1698 n = (diag[i]-shift)/8;
1699 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1700 t[i] = sum; /* save application of the lower-triangular part */
1701 /* upper */
1702 n = a->rlen[i]-(diag[i]-shift)/8-1;
1703 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1704 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1705 }
1706 xb = t;
1707 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1708 } else xb = b;
1709 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1710 for (i=m-1; i>=0; i--) {
1711 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1712 sum = xb[i];
1713 if (xb == b) {
1714 /* whole matrix (no checkpointing available) */
1715 n = a->rlen[i];
1716 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1717 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1718 } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1719 n = a->rlen[i]-(diag[i]-shift)/8-1;
1720 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1721 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1722 }
1723 }
1724 if (xb == b) {
1725 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1726 } else {
1727 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */
1728 }
1729 }
1730 }
1731 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1732 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1733 PetscFunctionReturn(0);
1734 }
1735
1736 /* -------------------------------------------------------------------*/
1737 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1738 MatGetRow_SeqSELL,
1739 MatRestoreRow_SeqSELL,
1740 MatMult_SeqSELL,
1741 /* 4*/ MatMultAdd_SeqSELL,
1742 MatMultTranspose_SeqSELL,
1743 MatMultTransposeAdd_SeqSELL,
1744 NULL,
1745 NULL,
1746 NULL,
1747 /* 10*/ NULL,
1748 NULL,
1749 NULL,
1750 MatSOR_SeqSELL,
1751 NULL,
1752 /* 15*/ MatGetInfo_SeqSELL,
1753 MatEqual_SeqSELL,
1754 MatGetDiagonal_SeqSELL,
1755 MatDiagonalScale_SeqSELL,
1756 NULL,
1757 /* 20*/ NULL,
1758 MatAssemblyEnd_SeqSELL,
1759 MatSetOption_SeqSELL,
1760 MatZeroEntries_SeqSELL,
1761 /* 24*/ NULL,
1762 NULL,
1763 NULL,
1764 NULL,
1765 NULL,
1766 /* 29*/ MatSetUp_SeqSELL,
1767 NULL,
1768 NULL,
1769 NULL,
1770 NULL,
1771 /* 34*/ MatDuplicate_SeqSELL,
1772 NULL,
1773 NULL,
1774 NULL,
1775 NULL,
1776 /* 39*/ NULL,
1777 NULL,
1778 NULL,
1779 MatGetValues_SeqSELL,
1780 MatCopy_SeqSELL,
1781 /* 44*/ NULL,
1782 MatScale_SeqSELL,
1783 MatShift_SeqSELL,
1784 NULL,
1785 NULL,
1786 /* 49*/ NULL,
1787 NULL,
1788 NULL,
1789 NULL,
1790 NULL,
1791 /* 54*/ MatFDColoringCreate_SeqXAIJ,
1792 NULL,
1793 NULL,
1794 NULL,
1795 NULL,
1796 /* 59*/ NULL,
1797 MatDestroy_SeqSELL,
1798 MatView_SeqSELL,
1799 NULL,
1800 NULL,
1801 /* 64*/ NULL,
1802 NULL,
1803 NULL,
1804 NULL,
1805 NULL,
1806 /* 69*/ NULL,
1807 NULL,
1808 NULL,
1809 NULL,
1810 NULL,
1811 /* 74*/ NULL,
1812 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1813 NULL,
1814 NULL,
1815 NULL,
1816 /* 79*/ NULL,
1817 NULL,
1818 NULL,
1819 NULL,
1820 NULL,
1821 /* 84*/ NULL,
1822 NULL,
1823 NULL,
1824 NULL,
1825 NULL,
1826 /* 89*/ NULL,
1827 NULL,
1828 NULL,
1829 NULL,
1830 NULL,
1831 /* 94*/ NULL,
1832 NULL,
1833 NULL,
1834 NULL,
1835 NULL,
1836 /* 99*/ NULL,
1837 NULL,
1838 NULL,
1839 MatConjugate_SeqSELL,
1840 NULL,
1841 /*104*/ NULL,
1842 NULL,
1843 NULL,
1844 NULL,
1845 NULL,
1846 /*109*/ NULL,
1847 NULL,
1848 NULL,
1849 NULL,
1850 MatMissingDiagonal_SeqSELL,
1851 /*114*/ NULL,
1852 NULL,
1853 NULL,
1854 NULL,
1855 NULL,
1856 /*119*/ NULL,
1857 NULL,
1858 NULL,
1859 NULL,
1860 NULL,
1861 /*124*/ NULL,
1862 NULL,
1863 NULL,
1864 NULL,
1865 NULL,
1866 /*129*/ NULL,
1867 NULL,
1868 NULL,
1869 NULL,
1870 NULL,
1871 /*134*/ NULL,
1872 NULL,
1873 NULL,
1874 NULL,
1875 NULL,
1876 /*139*/ NULL,
1877 NULL,
1878 NULL,
1879 MatFDColoringSetUp_SeqXAIJ,
1880 NULL,
1881 /*144*/ NULL
1882 };
1883
MatStoreValues_SeqSELL(Mat mat)1884 PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1885 {
1886 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data;
1887 PetscErrorCode ierr;
1888
1889 PetscFunctionBegin;
1890 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1891
1892 /* allocate space for values if not already there */
1893 if (!a->saved_values) {
1894 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr);
1895 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr);
1896 }
1897
1898 /* copy values over */
1899 ierr = PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1900 PetscFunctionReturn(0);
1901 }
1902
MatRetrieveValues_SeqSELL(Mat mat)1903 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1904 {
1905 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data;
1906 PetscErrorCode ierr;
1907
1908 PetscFunctionBegin;
1909 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1910 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1911 ierr = PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);CHKERRQ(ierr);
1912 PetscFunctionReturn(0);
1913 }
1914
1915 /*@C
1916 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1917
1918 Not Collective
1919
1920 Input Parameters:
1921 . mat - a MATSEQSELL matrix
1922 . array - pointer to the data
1923
1924 Level: intermediate
1925
1926 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1927 @*/
MatSeqSELLRestoreArray(Mat A,PetscScalar ** array)1928 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1929 {
1930 PetscErrorCode ierr;
1931
1932 PetscFunctionBegin;
1933 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
1934 PetscFunctionReturn(0);
1935 }
1936
MatCreate_SeqSELL(Mat B)1937 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1938 {
1939 Mat_SeqSELL *b;
1940 PetscMPIInt size;
1941 PetscErrorCode ierr;
1942
1943 PetscFunctionBegin;
1944 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1945 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1946
1947 ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
1948
1949 B->data = (void*)b;
1950
1951 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1952
1953 b->row = NULL;
1954 b->col = NULL;
1955 b->icol = NULL;
1956 b->reallocs = 0;
1957 b->ignorezeroentries = PETSC_FALSE;
1958 b->roworiented = PETSC_TRUE;
1959 b->nonew = 0;
1960 b->diag = NULL;
1961 b->solve_work = NULL;
1962 B->spptr = NULL;
1963 b->saved_values = NULL;
1964 b->idiag = NULL;
1965 b->mdiag = NULL;
1966 b->ssor_work = NULL;
1967 b->omega = 1.0;
1968 b->fshift = 0.0;
1969 b->idiagvalid = PETSC_FALSE;
1970 b->keepnonzeropattern = PETSC_FALSE;
1971
1972 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr);
1973 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr);
1974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr);
1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr);
1976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr);
1977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr);
1978 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr);
1979 PetscFunctionReturn(0);
1980 }
1981
1982 /*
1983 Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1984 */
MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)1985 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1986 {
1987 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data;
1988 PetscInt i,m=A->rmap->n;
1989 PetscInt totalslices=a->totalslices;
1990 PetscErrorCode ierr;
1991
1992 PetscFunctionBegin;
1993 c = (Mat_SeqSELL*)C->data;
1994
1995 C->factortype = A->factortype;
1996 c->row = NULL;
1997 c->col = NULL;
1998 c->icol = NULL;
1999 c->reallocs = 0;
2000
2001 C->assembled = PETSC_TRUE;
2002
2003 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
2004 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
2005
2006 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr);
2007 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
2008 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr);
2009 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr);
2010
2011 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
2012 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
2013
2014 /* allocate the matrix space */
2015 if (mallocmatspace) {
2016 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr);
2017 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2018
2019 c->singlemalloc = PETSC_TRUE;
2020
2021 if (m > 0) {
2022 ierr = PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);CHKERRQ(ierr);
2023 if (cpvalues == MAT_COPY_VALUES) {
2024 ierr = PetscArraycpy(c->val,a->val,a->maxallocmat);CHKERRQ(ierr);
2025 } else {
2026 ierr = PetscArrayzero(c->val,a->maxallocmat);CHKERRQ(ierr);
2027 }
2028 }
2029 }
2030
2031 c->ignorezeroentries = a->ignorezeroentries;
2032 c->roworiented = a->roworiented;
2033 c->nonew = a->nonew;
2034 if (a->diag) {
2035 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr);
2036 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr);
2037 for (i=0; i<m; i++) {
2038 c->diag[i] = a->diag[i];
2039 }
2040 } else c->diag = NULL;
2041
2042 c->solve_work = NULL;
2043 c->saved_values = NULL;
2044 c->idiag = NULL;
2045 c->ssor_work = NULL;
2046 c->keepnonzeropattern = a->keepnonzeropattern;
2047 c->free_val = PETSC_TRUE;
2048 c->free_colidx = PETSC_TRUE;
2049
2050 c->maxallocmat = a->maxallocmat;
2051 c->maxallocrow = a->maxallocrow;
2052 c->rlenmax = a->rlenmax;
2053 c->nz = a->nz;
2054 C->preallocated = PETSC_TRUE;
2055
2056 c->nonzerorowcnt = a->nonzerorowcnt;
2057 C->nonzerostate = A->nonzerostate;
2058
2059 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2060 PetscFunctionReturn(0);
2061 }
2062
MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat * B)2063 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2064 {
2065 PetscErrorCode ierr;
2066
2067 PetscFunctionBegin;
2068 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
2069 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
2070 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2071 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
2072 }
2073 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2074 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
2075 PetscFunctionReturn(0);
2076 }
2077
2078 /*@C
2079 MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2080
2081 Collective
2082
2083 Input Parameters:
2084 + comm - MPI communicator, set to PETSC_COMM_SELF
2085 . m - number of rows
2086 . n - number of columns
2087 . rlenmax - maximum number of nonzeros in a row
2088 - rlen - array containing the number of nonzeros in the various rows
2089 (possibly different for each row) or NULL
2090
2091 Output Parameter:
2092 . A - the matrix
2093
2094 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2095 MatXXXXSetPreallocation() paradigm instead of this routine directly.
2096 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2097
2098 Notes:
2099 If nnz is given then nz is ignored
2100
2101 Specify the preallocated storage with either rlenmax or rlen (not both).
2102 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2103 allocation. For large problems you MUST preallocate memory or you
2104 will get TERRIBLE performance, see the users' manual chapter on matrices.
2105
2106 Level: intermediate
2107
2108 .seealso: MatCreate(), MatCreateSELL(), MatSetValues()
2109
2110 @*/
MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat * A)2111 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2112 {
2113 PetscErrorCode ierr;
2114
2115 PetscFunctionBegin;
2116 ierr = MatCreate(comm,A);CHKERRQ(ierr);
2117 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2118 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr);
2119 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr);
2120 PetscFunctionReturn(0);
2121 }
2122
MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)2123 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2124 {
2125 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2126 PetscInt totalslices=a->totalslices;
2127 PetscErrorCode ierr;
2128
2129 PetscFunctionBegin;
2130 /* If the matrix dimensions are not equal,or no of nonzeros */
2131 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2132 *flg = PETSC_FALSE;
2133 PetscFunctionReturn(0);
2134 }
2135 /* if the a->colidx are the same */
2136 ierr = PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);CHKERRQ(ierr);
2137 if (!*flg) PetscFunctionReturn(0);
2138 /* if a->val are the same */
2139 ierr = PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);CHKERRQ(ierr);
2140 PetscFunctionReturn(0);
2141 }
2142
MatSeqSELLInvalidateDiagonal(Mat A)2143 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2144 {
2145 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2146
2147 PetscFunctionBegin;
2148 a->idiagvalid = PETSC_FALSE;
2149 PetscFunctionReturn(0);
2150 }
2151
MatConjugate_SeqSELL(Mat A)2152 PetscErrorCode MatConjugate_SeqSELL(Mat A)
2153 {
2154 #if defined(PETSC_USE_COMPLEX)
2155 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2156 PetscInt i;
2157 PetscScalar *val = a->val;
2158
2159 PetscFunctionBegin;
2160 for (i=0; i<a->sliidx[a->totalslices]; i++) {
2161 val[i] = PetscConj(val[i]);
2162 }
2163 #else
2164 PetscFunctionBegin;
2165 #endif
2166 PetscFunctionReturn(0);
2167 }
2168