1
2 #include <petsc/private/vecimpl.h>
3
4 #define DEFAULT_STASH_SIZE 100
5
6 /*
7 VecStashCreate_Private - Creates a stash,currently used for all the parallel
8 matrix implementations. The stash is where elements of a matrix destined
9 to be stored on other processors are kept until matrix assembly is done.
10
11 This is a simple minded stash. Simply adds entries to end of stash.
12
13 Input Parameters:
14 comm - communicator, required for scatters.
15 bs - stash block size. used when stashing blocks of values
16
17 Output Parameters:
18 stash - the newly created stash
19 */
VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash * stash)20 PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
21 {
22 PetscErrorCode ierr;
23 PetscInt max,*opt,nopt;
24 PetscBool flg;
25
26 PetscFunctionBegin;
27 /* Require 2 tags, get the second using PetscCommGetNewTag() */
28 stash->comm = comm;
29 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
30 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
31 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
32 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
33
34 nopt = stash->size;
35 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
36 ierr = PetscOptionsGetIntArray(NULL,NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
37 if (flg) {
38 if (nopt == 1) max = opt[0];
39 else if (nopt == stash->size) max = opt[stash->rank];
40 else if (stash->rank < nopt) max = opt[stash->rank];
41 else max = 0; /* use default */
42 stash->umax = max;
43 } else {
44 stash->umax = 0;
45 }
46 ierr = PetscFree(opt);CHKERRQ(ierr);
47
48 if (bs <= 0) bs = 1;
49
50 stash->bs = bs;
51 stash->nmax = 0;
52 stash->oldnmax = 0;
53 stash->n = 0;
54 stash->reallocs = -1;
55 stash->idx = NULL;
56 stash->array = NULL;
57
58 stash->send_waits = NULL;
59 stash->recv_waits = NULL;
60 stash->send_status = NULL;
61 stash->nsends = 0;
62 stash->nrecvs = 0;
63 stash->svalues = NULL;
64 stash->rvalues = NULL;
65 stash->rmax = 0;
66 stash->nprocs = NULL;
67 stash->nprocessed = 0;
68 stash->donotstash = PETSC_FALSE;
69 stash->ignorenegidx = PETSC_FALSE;
70 PetscFunctionReturn(0);
71 }
72
73 /*
74 VecStashDestroy_Private - Destroy the stash
75 */
VecStashDestroy_Private(VecStash * stash)76 PetscErrorCode VecStashDestroy_Private(VecStash *stash)
77 {
78 PetscErrorCode ierr;
79
80 PetscFunctionBegin;
81 ierr = PetscFree2(stash->array,stash->idx);CHKERRQ(ierr);
82 ierr = PetscFree(stash->bowners);CHKERRQ(ierr);
83 PetscFunctionReturn(0);
84 }
85
86 /*
87 VecStashScatterEnd_Private - This is called as the fial stage of
88 scatter. The final stages of message passing is done here, and
89 all the memory used for message passing is cleanedu up. This
90 routine also resets the stash, and deallocates the memory used
91 for the stash. It also keeps track of the current memory usage
92 so that the same value can be used the next time through.
93 */
VecStashScatterEnd_Private(VecStash * stash)94 PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
95 {
96 PetscErrorCode ierr;
97 PetscInt nsends=stash->nsends,oldnmax;
98 MPI_Status *send_status;
99
100 PetscFunctionBegin;
101 /* wait on sends */
102 if (nsends) {
103 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
104 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
105 ierr = PetscFree(send_status);CHKERRQ(ierr);
106 }
107
108 /* Now update nmaxold to be app 10% more than max n, this way the
109 wastage of space is reduced the next time this stash is used.
110 Also update the oldmax, only if it increases */
111 if (stash->n) {
112 oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
113 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
114 }
115
116 stash->nmax = 0;
117 stash->n = 0;
118 stash->reallocs = -1;
119 stash->rmax = 0;
120 stash->nprocessed = 0;
121
122 ierr = PetscFree2(stash->array,stash->idx);CHKERRQ(ierr);
123 stash->array = NULL;
124 stash->idx = NULL;
125 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
126 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
127 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
128 ierr = PetscFree2(stash->rvalues,stash->rindices);CHKERRQ(ierr);
129 ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
130 PetscFunctionReturn(0);
131 }
132
133 /*
134 VecStashGetInfo_Private - Gets the relavant statistics of the stash
135
136 Input Parameters:
137 stash - the stash
138 nstash - the size of the stash
139 reallocs - the number of additional mallocs incurred.
140
141 */
VecStashGetInfo_Private(VecStash * stash,PetscInt * nstash,PetscInt * reallocs)142 PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
143 {
144 PetscFunctionBegin;
145 if (nstash) *nstash = stash->n*stash->bs;
146 if (reallocs) {
147 if (stash->reallocs < 0) *reallocs = 0;
148 else *reallocs = stash->reallocs;
149 }
150 PetscFunctionReturn(0);
151 }
152
153
154 /*
155 VecStashSetInitialSize_Private - Sets the initial size of the stash
156
157 Input Parameters:
158 stash - the stash
159 max - the value that is used as the max size of the stash.
160 this value is used while allocating memory. It specifies
161 the number of vals stored, even with the block-stash
162 */
VecStashSetInitialSize_Private(VecStash * stash,PetscInt max)163 PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
164 {
165 PetscFunctionBegin;
166 stash->umax = max;
167 PetscFunctionReturn(0);
168 }
169
170 /* VecStashExpand_Private - Expand the stash. This function is called
171 when the space in the stash is not sufficient to add the new values
172 being inserted into the stash.
173
174 Input Parameters:
175 stash - the stash
176 incr - the minimum increase requested
177
178 Notes:
179 This routine doubles the currently used memory.
180 */
VecStashExpand_Private(VecStash * stash,PetscInt incr)181 PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
182 {
183 PetscErrorCode ierr;
184 PetscInt *n_idx,newnmax,bs=stash->bs;
185 PetscScalar *n_array;
186
187 PetscFunctionBegin;
188 /* allocate a larger stash. */
189 if (!stash->oldnmax && !stash->nmax) { /* new stash */
190 if (stash->umax) newnmax = stash->umax/bs;
191 else newnmax = DEFAULT_STASH_SIZE/bs;
192 } else if (!stash->nmax) { /* resuing stash */
193 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
194 else newnmax = stash->oldnmax/bs;
195 } else newnmax = stash->nmax*2;
196
197 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
198
199 ierr = PetscMalloc2(bs*newnmax,&n_array,newnmax,&n_idx);CHKERRQ(ierr);
200 ierr = PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));CHKERRQ(ierr);
201 ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
202 ierr = PetscFree2(stash->array,stash->idx);CHKERRQ(ierr);
203
204 stash->array = n_array;
205 stash->idx = n_idx;
206 stash->nmax = newnmax;
207 stash->reallocs++;
208 PetscFunctionReturn(0);
209 }
210 /*
211 VecStashScatterBegin_Private - Initiates the transfer of values to the
212 correct owners. This function goes through the stash, and check the
213 owners of each stashed value, and sends the values off to the owner
214 processors.
215
216 Input Parameters:
217 stash - the stash
218 owners - an array of size 'no-of-procs' which gives the ownership range
219 for each node.
220
221 Notes:
222 The 'owners' array in the cased of the blocked-stash has the
223 ranges specified blocked global indices, and for the regular stash in
224 the proper global indices.
225 */
VecStashScatterBegin_Private(VecStash * stash,PetscInt * owners)226 PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
227 {
228 PetscErrorCode ierr;
229 PetscMPIInt size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
230 PetscInt *owner,*start,*nprocs,nsends,nreceives;
231 PetscInt nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
232 PetscScalar *rvalues,*svalues;
233 MPI_Comm comm = stash->comm;
234 MPI_Request *send_waits,*recv_waits;
235
236 PetscFunctionBegin;
237 /* first count number of contributors to each processor */
238 ierr = PetscCalloc1(2*size,&nprocs);CHKERRQ(ierr);
239 ierr = PetscMalloc1(stash->n,&owner);CHKERRQ(ierr);
240
241 j = 0;
242 lastidx = -1;
243 for (i=0; i<stash->n; i++) {
244 /* if indices are NOT locally sorted, need to start search at the beginning */
245 if (lastidx > (idx = stash->idx[i])) j = 0;
246 lastidx = idx;
247 for (; j<size; j++) {
248 if (idx >= owners[j] && idx < owners[j+1]) {
249 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
250 }
251 }
252 }
253 nsends = 0;
254 for (i=0; i<size; i++) nsends += nprocs[2*i+1];
255
256 /* inform other processors of number of messages and max length*/
257 ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
258
259 /* post receives:
260 since we don't know how long each individual message is we
261 allocate the largest needed buffer for each receive. Potentially
262 this is a lot of wasted space.
263 */
264 ierr = PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);CHKERRQ(ierr);
265 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
266 for (i=0,count=0; i<nreceives; i++) {
267 ierr = MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
268 ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
269 }
270
271 /* do sends:
272 1) starts[i] gives the starting index in svalues for stuff going to
273 the ith processor
274 */
275 ierr = PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);CHKERRQ(ierr);
276 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
277 ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
278 /* use 2 sends the first with all_v, the next with all_i */
279 start[0] = 0;
280 for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
281
282 for (i=0; i<stash->n; i++) {
283 j = owner[i];
284 if (bs == 1) svalues[start[j]] = stash->array[i];
285 else {
286 ierr = PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr);
287 }
288 sindices[start[j]] = stash->idx[i];
289 start[j]++;
290 }
291 start[0] = 0;
292 for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
293
294 for (i=0,count=0; i<size; i++) {
295 if (nprocs[2*i+1]) {
296 ierr = MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
297 ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
298 }
299 }
300 ierr = PetscFree(owner);CHKERRQ(ierr);
301 ierr = PetscFree(start);CHKERRQ(ierr);
302 /* This memory is reused in scatter end for a different purpose*/
303 for (i=0; i<2*size; i++) nprocs[i] = -1;
304
305 stash->nprocs = nprocs;
306 stash->svalues = svalues;
307 stash->sindices = sindices;
308 stash->rvalues = rvalues;
309 stash->rindices = rindices;
310 stash->nsends = nsends;
311 stash->nrecvs = nreceives;
312 stash->send_waits = send_waits;
313 stash->recv_waits = recv_waits;
314 stash->rmax = nmax;
315 PetscFunctionReturn(0);
316 }
317
318 /*
319 VecStashScatterGetMesg_Private - This function waits on the receives posted
320 in the function VecStashScatterBegin_Private() and returns one message at
321 a time to the calling function. If no messages are left, it indicates this
322 by setting flg = 0, else it sets flg = 1.
323
324 Input Parameters:
325 stash - the stash
326
327 Output Parameters:
328 nvals - the number of entries in the current message.
329 rows - an array of row indices (or blocked indices) corresponding to the values
330 cols - an array of columnindices (or blocked indices) corresponding to the values
331 vals - the values
332 flg - 0 indicates no more message left, and the current call has no values associated.
333 1 indicates that the current call successfully received a message, and the
334 other output parameters nvals,rows,cols,vals are set appropriately.
335 */
VecStashScatterGetMesg_Private(VecStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscScalar ** vals,PetscInt * flg)336 PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
337 {
338 PetscErrorCode ierr;
339 PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
340 PetscInt *flg_v;
341 PetscInt i1,i2,bs=stash->bs;
342 MPI_Status recv_status;
343 PetscBool match_found = PETSC_FALSE;
344
345 PetscFunctionBegin;
346 *flg = 0; /* When a message is discovered this is reset to 1 */
347 /* Return if no more messages to process */
348 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
349
350 flg_v = stash->nprocs;
351 /* If a matching pair of receives are found, process them, and return the data to
352 the calling function. Until then keep receiving messages */
353 while (!match_found) {
354 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
355 /* Now pack the received message into a structure which is useable by others */
356 if (i % 2) {
357 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
358 flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
359 } else {
360 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
361 flg_v[2*recv_status.MPI_SOURCE] = i/2;
362 *nvals = *nvals/bs;
363 }
364
365 /* Check if we have both the messages from this proc */
366 i1 = flg_v[2*recv_status.MPI_SOURCE];
367 i2 = flg_v[2*recv_status.MPI_SOURCE+1];
368 if (i1 != -1 && i2 != -1) {
369 *rows = stash->rindices + i2*stash->rmax;
370 *vals = stash->rvalues + i1*bs*stash->rmax;
371 *flg = 1;
372 stash->nprocessed++;
373 match_found = PETSC_TRUE;
374 }
375 }
376 PetscFunctionReturn(0);
377 }
378
379 /*
380 * Sort the stash, removing duplicates (combining as appropriate).
381 */
VecStashSortCompress_Private(VecStash * stash)382 PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
383 {
384 PetscErrorCode ierr;
385 PetscInt i,j,bs = stash->bs;
386
387 PetscFunctionBegin;
388 if (!stash->n) PetscFunctionReturn(0);
389 if (bs == 1) {
390 ierr = PetscSortIntWithScalarArray(stash->n,stash->idx,stash->array);CHKERRQ(ierr);
391 for (i=1,j=0; i<stash->n; i++) {
392 if (stash->idx[i] == stash->idx[j]) {
393 switch (stash->insertmode) {
394 case ADD_VALUES:
395 stash->array[j] += stash->array[i];
396 break;
397 case INSERT_VALUES:
398 stash->array[j] = stash->array[i];
399 break;
400 default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
401 }
402 } else {
403 j++;
404 stash->idx[j] = stash->idx[i];
405 stash->array[j] = stash->array[i];
406 }
407 }
408 stash->n = j + 1;
409 } else { /* block stash */
410 PetscInt *perm = NULL;
411 PetscScalar *arr;
412 ierr = PetscMalloc2(stash->n,&perm,stash->n*bs,&arr);CHKERRQ(ierr);
413 for (i=0; i<stash->n; i++) perm[i] = i;
414 ierr = PetscSortIntWithArray(stash->n,stash->idx,perm);CHKERRQ(ierr);
415
416 /* Out-of-place copy of arr */
417 ierr = PetscMemcpy(arr,stash->array+perm[0]*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr);
418 for (i=1,j=0; i<stash->n; i++) {
419 PetscInt k;
420 if (stash->idx[i] == stash->idx[j]) {
421 switch (stash->insertmode) {
422 case ADD_VALUES:
423 for (k=0; k<bs; k++) arr[j*bs+k] += stash->array[perm[i]*bs+k];
424 break;
425 case INSERT_VALUES:
426 for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
427 break;
428 default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
429 }
430 } else {
431 j++;
432 stash->idx[j] = stash->idx[i];
433 for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
434 }
435 }
436 stash->n = j + 1;
437 ierr = PetscMemcpy(stash->array,arr,stash->n*bs*sizeof(PetscScalar));CHKERRQ(ierr);
438 ierr = PetscFree2(perm,arr);CHKERRQ(ierr);
439 }
440 PetscFunctionReturn(0);
441 }
442
VecStashGetOwnerList_Private(VecStash * stash,PetscLayout map,PetscMPIInt * nowners,PetscMPIInt ** owners)443 PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash,PetscLayout map,PetscMPIInt *nowners,PetscMPIInt **owners)
444 {
445 PetscInt i,bs = stash->bs;
446 PetscMPIInt r;
447 PetscSegBuffer seg;
448 PetscErrorCode ierr;
449
450 PetscFunctionBegin;
451 if (bs != 1 && bs != map->bs) SETERRQ2(map->comm,PETSC_ERR_PLIB,"Stash block size %D does not match layout block size %D",bs,map->bs);
452 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),50,&seg);CHKERRQ(ierr);
453 *nowners = 0;
454 for (i=0,r=-1; i<stash->n; i++) {
455 if (stash->idx[i]*bs >= map->range[r+1]) {
456 PetscMPIInt *rank;
457 ierr = PetscSegBufferGet(seg,1,&rank);CHKERRQ(ierr);
458 ierr = PetscLayoutFindOwner(map,stash->idx[i]*bs,&r);CHKERRQ(ierr);
459 *rank = r;
460 (*nowners)++;
461 }
462 }
463 ierr = PetscSegBufferExtractAlloc(seg,owners);CHKERRQ(ierr);
464 ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr);
465 PetscFunctionReturn(0);
466 }
467