1 
2 #include <petsc/private/vecimpl.h>
3 
4 #define DEFAULT_STASH_SIZE   100
5 
6 /*
7   VecStashCreate_Private - Creates a stash,currently used for all the parallel
8   matrix implementations. The stash is where elements of a matrix destined
9   to be stored on other processors are kept until matrix assembly is done.
10 
11   This is a simple minded stash. Simply adds entries to end of stash.
12 
13   Input Parameters:
14   comm - communicator, required for scatters.
15   bs   - stash block size. used when stashing blocks of values
16 
17   Output Parameters:
18   stash    - the newly created stash
19 */
VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash * stash)20 PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
21 {
22   PetscErrorCode ierr;
23   PetscInt       max,*opt,nopt;
24   PetscBool      flg;
25 
26   PetscFunctionBegin;
27   /* Require 2 tags, get the second using PetscCommGetNewTag() */
28   stash->comm = comm;
29   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
30   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
31   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
32   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
33 
34   nopt = stash->size;
35   ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr);
36   ierr = PetscOptionsGetIntArray(NULL,NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
37   if (flg) {
38     if (nopt == 1)                max = opt[0];
39     else if (nopt == stash->size) max = opt[stash->rank];
40     else if (stash->rank < nopt)  max = opt[stash->rank];
41     else                          max = 0; /* use default */
42     stash->umax = max;
43   } else {
44     stash->umax = 0;
45   }
46   ierr = PetscFree(opt);CHKERRQ(ierr);
47 
48   if (bs <= 0) bs = 1;
49 
50   stash->bs       = bs;
51   stash->nmax     = 0;
52   stash->oldnmax  = 0;
53   stash->n        = 0;
54   stash->reallocs = -1;
55   stash->idx      = NULL;
56   stash->array    = NULL;
57 
58   stash->send_waits   = NULL;
59   stash->recv_waits   = NULL;
60   stash->send_status  = NULL;
61   stash->nsends       = 0;
62   stash->nrecvs       = 0;
63   stash->svalues      = NULL;
64   stash->rvalues      = NULL;
65   stash->rmax         = 0;
66   stash->nprocs       = NULL;
67   stash->nprocessed   = 0;
68   stash->donotstash   = PETSC_FALSE;
69   stash->ignorenegidx = PETSC_FALSE;
70   PetscFunctionReturn(0);
71 }
72 
73 /*
74    VecStashDestroy_Private - Destroy the stash
75 */
VecStashDestroy_Private(VecStash * stash)76 PetscErrorCode VecStashDestroy_Private(VecStash *stash)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   ierr = PetscFree2(stash->array,stash->idx);CHKERRQ(ierr);
82   ierr = PetscFree(stash->bowners);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 /*
87    VecStashScatterEnd_Private - This is called as the fial stage of
88    scatter. The final stages of message passing is done here, and
89    all the memory used for message passing is cleanedu up. This
90    routine also resets the stash, and deallocates the memory used
91    for the stash. It also keeps track of the current memory usage
92    so that the same value can be used the next time through.
93 */
VecStashScatterEnd_Private(VecStash * stash)94 PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
95 {
96   PetscErrorCode ierr;
97   PetscInt       nsends=stash->nsends,oldnmax;
98   MPI_Status     *send_status;
99 
100   PetscFunctionBegin;
101   /* wait on sends */
102   if (nsends) {
103     ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr);
104     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
105     ierr = PetscFree(send_status);CHKERRQ(ierr);
106   }
107 
108   /* Now update nmaxold to be app 10% more than max n, this way the
109      wastage of space is reduced the next time this stash is used.
110      Also update the oldmax, only if it increases */
111   if (stash->n) {
112     oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
113     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
114   }
115 
116   stash->nmax       = 0;
117   stash->n          = 0;
118   stash->reallocs   = -1;
119   stash->rmax       = 0;
120   stash->nprocessed = 0;
121 
122   ierr = PetscFree2(stash->array,stash->idx);CHKERRQ(ierr);
123   stash->array = NULL;
124   stash->idx   = NULL;
125   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
126   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
127   ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr);
128   ierr = PetscFree2(stash->rvalues,stash->rindices);CHKERRQ(ierr);
129   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 /*
134    VecStashGetInfo_Private - Gets the relavant statistics of the stash
135 
136    Input Parameters:
137    stash    - the stash
138    nstash   - the size of the stash
139    reallocs - the number of additional mallocs incurred.
140 
141 */
VecStashGetInfo_Private(VecStash * stash,PetscInt * nstash,PetscInt * reallocs)142 PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
143 {
144   PetscFunctionBegin;
145   if (nstash) *nstash = stash->n*stash->bs;
146   if (reallocs) {
147     if (stash->reallocs < 0) *reallocs = 0;
148     else                     *reallocs = stash->reallocs;
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 
154 /*
155    VecStashSetInitialSize_Private - Sets the initial size of the stash
156 
157    Input Parameters:
158    stash  - the stash
159    max    - the value that is used as the max size of the stash.
160             this value is used while allocating memory. It specifies
161             the number of vals stored, even with the block-stash
162 */
VecStashSetInitialSize_Private(VecStash * stash,PetscInt max)163 PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
164 {
165   PetscFunctionBegin;
166   stash->umax = max;
167   PetscFunctionReturn(0);
168 }
169 
170 /* VecStashExpand_Private - Expand the stash. This function is called
171    when the space in the stash is not sufficient to add the new values
172    being inserted into the stash.
173 
174    Input Parameters:
175    stash - the stash
176    incr  - the minimum increase requested
177 
178    Notes:
179    This routine doubles the currently used memory.
180 */
VecStashExpand_Private(VecStash * stash,PetscInt incr)181 PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
182 {
183   PetscErrorCode ierr;
184   PetscInt       *n_idx,newnmax,bs=stash->bs;
185   PetscScalar    *n_array;
186 
187   PetscFunctionBegin;
188   /* allocate a larger stash. */
189   if (!stash->oldnmax && !stash->nmax) { /* new stash */
190     if (stash->umax)                  newnmax = stash->umax/bs;
191     else                              newnmax = DEFAULT_STASH_SIZE/bs;
192   } else if (!stash->nmax) { /* resuing stash */
193     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
194     else                              newnmax = stash->oldnmax/bs;
195   } else                              newnmax = stash->nmax*2;
196 
197   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
198 
199   ierr = PetscMalloc2(bs*newnmax,&n_array,newnmax,&n_idx);CHKERRQ(ierr);
200   ierr = PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));CHKERRQ(ierr);
201   ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr);
202   ierr = PetscFree2(stash->array,stash->idx);CHKERRQ(ierr);
203 
204   stash->array = n_array;
205   stash->idx   = n_idx;
206   stash->nmax  = newnmax;
207   stash->reallocs++;
208   PetscFunctionReturn(0);
209 }
210 /*
211   VecStashScatterBegin_Private - Initiates the transfer of values to the
212   correct owners. This function goes through the stash, and check the
213   owners of each stashed value, and sends the values off to the owner
214   processors.
215 
216   Input Parameters:
217   stash  - the stash
218   owners - an array of size 'no-of-procs' which gives the ownership range
219            for each node.
220 
221   Notes:
222     The 'owners' array in the cased of the blocked-stash has the
223   ranges specified blocked global indices, and for the regular stash in
224   the proper global indices.
225 */
VecStashScatterBegin_Private(VecStash * stash,PetscInt * owners)226 PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
227 {
228   PetscErrorCode ierr;
229   PetscMPIInt    size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
230   PetscInt       *owner,*start,*nprocs,nsends,nreceives;
231   PetscInt       nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
232   PetscScalar    *rvalues,*svalues;
233   MPI_Comm       comm = stash->comm;
234   MPI_Request    *send_waits,*recv_waits;
235 
236   PetscFunctionBegin;
237   /*  first count number of contributors to each processor */
238   ierr = PetscCalloc1(2*size,&nprocs);CHKERRQ(ierr);
239   ierr = PetscMalloc1(stash->n,&owner);CHKERRQ(ierr);
240 
241   j       = 0;
242   lastidx = -1;
243   for (i=0; i<stash->n; i++) {
244     /* if indices are NOT locally sorted, need to start search at the beginning */
245     if (lastidx > (idx = stash->idx[i])) j = 0;
246     lastidx = idx;
247     for (; j<size; j++) {
248       if (idx >= owners[j] && idx < owners[j+1]) {
249         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
250       }
251     }
252   }
253   nsends = 0;
254   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
255 
256   /* inform other processors of number of messages and max length*/
257   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
258 
259   /* post receives:
260      since we don't know how long each individual message is we
261      allocate the largest needed buffer for each receive. Potentially
262      this is a lot of wasted space.
263   */
264   ierr = PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);CHKERRQ(ierr);
265   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
266   for (i=0,count=0; i<nreceives; i++) {
267     ierr = MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
268     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
269   }
270 
271   /* do sends:
272       1) starts[i] gives the starting index in svalues for stuff going to
273          the ith processor
274   */
275   ierr = PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);CHKERRQ(ierr);
276   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
277   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
278   /* use 2 sends the first with all_v, the next with all_i */
279   start[0] = 0;
280   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
281 
282   for (i=0; i<stash->n; i++) {
283     j = owner[i];
284     if (bs == 1) svalues[start[j]] = stash->array[i];
285     else {
286       ierr = PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr);
287     }
288     sindices[start[j]] = stash->idx[i];
289     start[j]++;
290   }
291   start[0] = 0;
292   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
293 
294   for (i=0,count=0; i<size; i++) {
295     if (nprocs[2*i+1]) {
296       ierr = MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
297       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
298     }
299   }
300   ierr = PetscFree(owner);CHKERRQ(ierr);
301   ierr = PetscFree(start);CHKERRQ(ierr);
302   /* This memory is reused in scatter end  for a different purpose*/
303   for (i=0; i<2*size; i++) nprocs[i] = -1;
304 
305   stash->nprocs     = nprocs;
306   stash->svalues    = svalues;
307   stash->sindices   = sindices;
308   stash->rvalues    = rvalues;
309   stash->rindices   = rindices;
310   stash->nsends     = nsends;
311   stash->nrecvs     = nreceives;
312   stash->send_waits = send_waits;
313   stash->recv_waits = recv_waits;
314   stash->rmax       = nmax;
315   PetscFunctionReturn(0);
316 }
317 
318 /*
319    VecStashScatterGetMesg_Private - This function waits on the receives posted
320    in the function VecStashScatterBegin_Private() and returns one message at
321    a time to the calling function. If no messages are left, it indicates this
322    by setting flg = 0, else it sets flg = 1.
323 
324    Input Parameters:
325    stash - the stash
326 
327    Output Parameters:
328    nvals - the number of entries in the current message.
329    rows  - an array of row indices (or blocked indices) corresponding to the values
330    cols  - an array of columnindices (or blocked indices) corresponding to the values
331    vals  - the values
332    flg   - 0 indicates no more message left, and the current call has no values associated.
333            1 indicates that the current call successfully received a message, and the
334              other output parameters nvals,rows,cols,vals are set appropriately.
335 */
VecStashScatterGetMesg_Private(VecStash * stash,PetscMPIInt * nvals,PetscInt ** rows,PetscScalar ** vals,PetscInt * flg)336 PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
337 {
338   PetscErrorCode ierr;
339   PetscMPIInt    i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
340   PetscInt       *flg_v;
341   PetscInt       i1,i2,bs=stash->bs;
342   MPI_Status     recv_status;
343   PetscBool      match_found = PETSC_FALSE;
344 
345   PetscFunctionBegin;
346   *flg = 0; /* When a message is discovered this is reset to 1 */
347   /* Return if no more messages to process */
348   if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0);
349 
350   flg_v = stash->nprocs;
351   /* If a matching pair of receives are found, process them, and return the data to
352      the calling function. Until then keep receiving messages */
353   while (!match_found) {
354     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
355     /* Now pack the received message into a structure which is useable by others */
356     if (i % 2) {
357       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
358       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
359     } else {
360       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
361       flg_v[2*recv_status.MPI_SOURCE] = i/2;
362       *nvals = *nvals/bs;
363     }
364 
365     /* Check if we have both the messages from this proc */
366     i1 = flg_v[2*recv_status.MPI_SOURCE];
367     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
368     if (i1 != -1 && i2 != -1) {
369       *rows = stash->rindices + i2*stash->rmax;
370       *vals = stash->rvalues + i1*bs*stash->rmax;
371       *flg  = 1;
372       stash->nprocessed++;
373       match_found = PETSC_TRUE;
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 /*
380  * Sort the stash, removing duplicates (combining as appropriate).
381  */
VecStashSortCompress_Private(VecStash * stash)382 PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
383 {
384   PetscErrorCode ierr;
385   PetscInt i,j,bs = stash->bs;
386 
387   PetscFunctionBegin;
388   if (!stash->n) PetscFunctionReturn(0);
389   if (bs == 1) {
390     ierr = PetscSortIntWithScalarArray(stash->n,stash->idx,stash->array);CHKERRQ(ierr);
391     for (i=1,j=0; i<stash->n; i++) {
392       if (stash->idx[i] == stash->idx[j]) {
393         switch (stash->insertmode) {
394         case ADD_VALUES:
395           stash->array[j] += stash->array[i];
396           break;
397         case INSERT_VALUES:
398           stash->array[j] = stash->array[i];
399           break;
400         default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
401         }
402       } else {
403         j++;
404         stash->idx[j]   = stash->idx[i];
405         stash->array[j] = stash->array[i];
406       }
407     }
408     stash->n = j + 1;
409   } else {                      /* block stash */
410     PetscInt *perm = NULL;
411     PetscScalar *arr;
412     ierr = PetscMalloc2(stash->n,&perm,stash->n*bs,&arr);CHKERRQ(ierr);
413     for (i=0; i<stash->n; i++) perm[i] = i;
414     ierr = PetscSortIntWithArray(stash->n,stash->idx,perm);CHKERRQ(ierr);
415 
416     /* Out-of-place copy of arr */
417     ierr = PetscMemcpy(arr,stash->array+perm[0]*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr);
418     for (i=1,j=0; i<stash->n; i++) {
419       PetscInt k;
420       if (stash->idx[i] == stash->idx[j]) {
421         switch (stash->insertmode) {
422         case ADD_VALUES:
423           for (k=0; k<bs; k++) arr[j*bs+k] += stash->array[perm[i]*bs+k];
424           break;
425         case INSERT_VALUES:
426           for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
427           break;
428         default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
429         }
430       } else {
431         j++;
432         stash->idx[j] = stash->idx[i];
433         for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
434       }
435     }
436     stash->n = j + 1;
437     ierr = PetscMemcpy(stash->array,arr,stash->n*bs*sizeof(PetscScalar));CHKERRQ(ierr);
438     ierr = PetscFree2(perm,arr);CHKERRQ(ierr);
439   }
440   PetscFunctionReturn(0);
441 }
442 
VecStashGetOwnerList_Private(VecStash * stash,PetscLayout map,PetscMPIInt * nowners,PetscMPIInt ** owners)443 PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash,PetscLayout map,PetscMPIInt *nowners,PetscMPIInt **owners)
444 {
445   PetscInt       i,bs = stash->bs;
446   PetscMPIInt    r;
447   PetscSegBuffer seg;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   if (bs != 1 && bs != map->bs) SETERRQ2(map->comm,PETSC_ERR_PLIB,"Stash block size %D does not match layout block size %D",bs,map->bs);
452   ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),50,&seg);CHKERRQ(ierr);
453   *nowners = 0;
454   for (i=0,r=-1; i<stash->n; i++) {
455     if (stash->idx[i]*bs >= map->range[r+1]) {
456       PetscMPIInt *rank;
457       ierr = PetscSegBufferGet(seg,1,&rank);CHKERRQ(ierr);
458       ierr = PetscLayoutFindOwner(map,stash->idx[i]*bs,&r);CHKERRQ(ierr);
459       *rank = r;
460       (*nowners)++;
461     }
462   }
463   ierr = PetscSegBufferExtractAlloc(seg,owners);CHKERRQ(ierr);
464   ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467