1 #include "redist.h"
2 /** $Id: pitrmr.c,v 1.1.1.1 2000/02/15 18:04:08 susan Exp $
3 ------------------------------------------------------------------------
4
5 -- ScaLAPACK routine (version 1.7) --
6 Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of
7 California, Berkeley.
8 October 31, 1994.
9
10 SUBROUTINE PITRMR2D(UPLO, DIAG, M, N,
11 $ A, IA, JA, ADESC,
12 $ B, IB, JB, BDESC,
13 $ CTXT)
14 ------------------------------------------------------------------------
15 Purpose
16 =======
17
18 PITRMR2D copies a submatrix of A on a submatrix of B.
19 A and B can have different distributions: they can be on different
20 processor grids, they can have different blocksizes, the beginning
21 of the area to be copied can be at a different places on A and B.
22
23 The parameters can be confusing when the grids of A and B are
24 partially or completly disjoint, in the case a processor calls
25 this routines but is either not in the A context or B context, the
26 ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the
27 routine recognise this situation.
28 To summarize the rule:
29 - If a processor is in A context, all parameters related to A must be valid.
30 - If a processor is in B context, all parameters related to B must be valid.
31 - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1.
32 - M and N must be valid for everyone.
33 - other parameters are not examined.
34
35 The submatrix to be copied is assumed to be trapezoidal. So only
36 the upper or the lower part will be copied. The other part is
37 unchanged.
38
39 Notes
40 =====
41
42 A description vector is associated with each 2D block-cyclicly dis-
43 tributed matrix. This vector stores the information required to
44 establish the mapping between a matrix entry and its corresponding
45 process and memory location.
46
47 In the following comments, the character _ should be read as
48 "of the distributed matrix". Let A be a generic term for any 2D
49 block cyclicly distributed matrix. Its description vector is DESC_A:
50
51 NOTATION STORED IN EXPLANATION
52 --------------- -------------- --------------------------------------
53 DT_A (global) DESCA( DT_ ) The descriptor type.
54 CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
55 the BLACS process grid A is distribu-
56 ted over. The context itself is glo-
57 bal, but the handle (the integer
58 value) may vary.
59 M_A (global) DESCA( M_ ) The number of rows in the distributed
60 matrix A.
61 N_A (global) DESCA( N_ ) The number of columns in the distri-
62 buted matrix A.
63 MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
64 the rows of A.
65 NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
66 the columns of A.
67 RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
68 row of the matrix A is distributed.
69 CSRC_A (global) DESCA( CSRC_ ) The process column over which the
70 first column of A is distributed.
71 LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
72 array storing the local blocks of the
73 distributed matrix A.
74 LLD_A >= MAX(1,LOCp(M_A)).
75
76
77
78 Important notice
79 ================
80 The parameters of the routine have changed in April 1996
81 There is a new last argument. It must be a context englobing
82 all processors involved in the initial and final distribution.
83
84 Be aware that all processors included in this
85 context must call the redistribution routine.
86
87 Parameters
88 ==========
89
90 UPLO (input) CHARACTER*1.
91 On entry, UPLO specifies whether we should copy the upper
92 part of the lower part of the defined submatrix:
93 UPLO = 'U' or 'u' copy the upper triangular part.
94 UPLO = 'L' or 'l' copy the lower triangular part.
95 Unchanged on exit.
96
97 DIAG (input) CHARACTER*1.
98 On entry, DIAG specifies whether we should copy the diagonal.
99 DIAG = 'U' or 'u' do NOT copy the diagonal of the submatrix.
100 DIAG = 'N' or 'n' DO copy the diagonal of the submatrix.
101 Unchanged on exit.
102
103 M (input) INTEGER.
104 On entry, M specifies the number of rows of the
105 submatrix to be copied. M must be at least zero.
106 Unchanged on exit.
107
108 N (input) INTEGER.
109 On entry, N specifies the number of cols of the submatrix
110 to be redistributed.rows of B. M must be at least zero.
111 Unchanged on exit.
112
113 A (input) INTEGER
114 On entry, the source matrix.
115 Unchanged on exit.
116
117 IA,JA (input) INTEGER
118 On entry,the coordinates of the beginning of the submatrix
119 of A to copy.
120 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1,
121 Unchanged on exit.
122
123 ADESC (input) A description vector (see Notes above)
124 If the current processor is not part of the context of A
125 the ADESC[CTXT] must be equal to -1.
126
127
128 B (output) INTEGER
129 On entry, the destination matrix.
130 The portion corresponding to the defined submatrix are updated.
131
132 IB,JB (input) INTEGER
133 On entry,the coordinates of the beginning of the submatrix
134 of B that will be updated.
135 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1,
136 Unchanged on exit.
137
138 BDESC (input) B description vector (see Notes above)
139 For processors not part of the context of B
140 BDESC[CTXT] must be equal to -1.
141
142 CTXT (input) a context englobing at least all processors included
143 in either A context or B context
144
145
146
147 Memory requirement :
148 ====================
149
150 for the processors belonging to grid 0, one buffer of size block 0
151 and for the processors belonging to grid 1, also one buffer of size
152 block 1.
153
154 ============================================================
155 Created March 1993 by B. Tourancheau (See sccs for modifications).
156 Modifications by Loic PRYLLI 1995
157 ============================================================ */
158 #define static2 static
159 #if defined(Add_) || defined(f77IsF2C)
160 #define fortran_mr2d pitrmr2do_
161 #define fortran_mr2dnew pitrmr2d_
162 #elif defined(UpCase)
163 #define fortran_mr2dnew PITRMR2D
164 #define fortran_mr2d PITRMR2DO
165 #define icopy_ ICOPY
166 #define ilacpy_ ILACPY
167 #else
168 #define fortran_mr2d pitrmr2do
169 #define fortran_mr2dnew pitrmr2d
170 #define icopy_ icopy
171 #define ilacpy_ ilacpy
172 #endif
173 #define Clacpy Citrlacpy
174 void Clacpy();
175 typedef struct {
176 int desctype;
177 int ctxt;
178 int m;
179 int n;
180 int nbrow;
181 int nbcol;
182 int sprow;
183 int spcol;
184 int lda;
185 } MDESC;
186 #define BLOCK_CYCLIC_2D 1
187 typedef struct {
188 int gstart;
189 int len;
190 } IDESC;
191 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
192 #define max(A,B) ((A)>(B)?(A):(B))
193 #define min(A,B) ((A)>(B)?(B):(A))
194 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
195 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
196 #ifdef MALLOCDEBUG
197 #define malloc mymalloc
198 #define free myfree
199 #define realloc myrealloc
200 #endif
201 /* Cblacs */
202 extern void Cblacs_pcoord();
203 extern int Cblacs_pnum();
204 extern void Csetpvmtids();
205 extern void Cblacs_get();
206 extern void Cblacs_pinfo();
207 extern void Cblacs_gridinfo();
208 extern void Cblacs_gridinit();
209 extern void Cblacs_exit();
210 extern void Cblacs_gridexit();
211 extern void Cblacs_setup();
212 extern void Cigebs2d();
213 extern void Cigebr2d();
214 extern void Cigesd2d();
215 extern void Cigerv2d();
216 extern void Cigsum2d();
217 extern void Cigamn2d();
218 extern void Cigamx2d();
219 extern void Cigesd2d();
220 extern void Cigerv2d();
221 /* lapack */
222 void ilacpy_();
223 /* aux fonctions */
224 extern int localindice();
225 extern void *mr2d_malloc();
226 extern int ppcm();
227 extern int localsize();
228 extern int memoryblocksize();
229 extern int changeorigin();
230 extern void paramcheck();
231 /* tools and others function */
232 #define scanD0 itrscanD0
233 #define dispmat itrdispmat
234 #define setmemory itrsetmemory
235 #define freememory itrfreememory
236 #define scan_intervals itrscan_intervals
237 extern void scanD0();
238 extern void dispmat();
239 extern void setmemory();
240 extern void freememory();
241 extern int scan_intervals();
242 extern void Cpitrmr2do();
243 extern void Cpitrmr2d();
244 /* some defines for Cpitrmr2do */
245 #define SENDBUFF 0
246 #define RECVBUFF 1
247 #define SIZEBUFF 2
248 #if 0
249 #define DEBUG
250 #endif
251 #ifndef DEBUG
252 #define NDEBUG
253 #endif
254 #include <stdio.h>
255 #include <stdlib.h>
256 #include <assert.h>
257 #define DESCLEN 9
258 void
fortran_mr2d(uplo,diag,m,n,A,ia,ja,desc_A,B,ib,jb,desc_B)259 fortran_mr2d(uplo, diag, m, n, A, ia, ja, desc_A,
260 B, ib, jb, desc_B)
261 char *uplo, *diag;
262 int *ia, *ib, *ja, *jb, *m, *n;
263 int desc_A[DESCLEN], desc_B[DESCLEN];
264 int *A, *B;
265 {
266 Cpitrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A,
267 B, *ib, *jb, (MDESC *) desc_B);
268 return;
269 }
270 void
fortran_mr2dnew(uplo,diag,m,n,A,ia,ja,desc_A,B,ib,jb,desc_B,gcontext)271 fortran_mr2dnew(uplo, diag, m, n, A, ia, ja, desc_A,
272 B, ib, jb, desc_B, gcontext)
273 char *uplo, *diag;
274 int *ia, *ib, *ja, *jb, *m, *n;
275 int desc_A[DESCLEN], desc_B[DESCLEN];
276 int *A, *B;
277 int *gcontext;
278 {
279 Cpitrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A,
280 B, *ib, *jb, (MDESC *) desc_B, *gcontext);
281 return;
282 }
283 static2 void init_chenille();
284 static2 int inter_len();
285 static2 int block2buff();
286 static2 void buff2block();
287 static2 void gridreshape();
288 void
Cpitrmr2do(uplo,diag,m,n,ptrmyblock,ia,ja,ma,ptrmynewblock,ib,jb,mb)289 Cpitrmr2do(uplo, diag, m, n,
290 ptrmyblock, ia, ja, ma,
291 ptrmynewblock, ib, jb, mb)
292 char *uplo, *diag;
293 int *ptrmyblock, *ptrmynewblock;
294 /* pointers to the memory location of the matrix and the redistributed matrix */
295 MDESC *ma;
296 MDESC *mb;
297 int ia, ja, ib, jb, m, n;
298 {
299 int dummy, nprocs;
300 int gcontext;
301 /* first we initialize a global grid which serve as a reference to
302 * communicate from grid a to grid b */
303 Cblacs_pinfo(&dummy, &nprocs);
304 Cblacs_get(0, 0, &gcontext);
305 Cblacs_gridinit(&gcontext, "R", 1, nprocs);
306 Cpitrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma,
307 ptrmynewblock, ib, jb, mb, gcontext);
308 Cblacs_gridexit(gcontext);
309 }
310 #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis
311 * idem B puis ia,ja puis ib,jb */
312 #define MAGIC_MAX 100000000
313 void
Cpitrmr2d(uplo,diag,m,n,ptrmyblock,ia,ja,ma,ptrmynewblock,ib,jb,mb,globcontext)314 Cpitrmr2d(uplo, diag, m, n,
315 ptrmyblock, ia, ja, ma,
316 ptrmynewblock, ib, jb, mb, globcontext)
317 char *uplo, *diag;
318 int *ptrmyblock, *ptrmynewblock;
319 /* pointers to the memory location of the matrix and the redistributed matrix */
320 MDESC *ma;
321 MDESC *mb;
322 int ia, ja, ib, jb, m, n, globcontext;
323 {
324 int *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
325 int *recvptr;
326 MDESC newa, newb;
327 int *proc0, *proc1, *param;
328 int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
329 int i, j;
330 int nprow, npcol, gcontext;
331 int recvsize, sendsize;
332 IDESC *h_inter; /* to store the horizontal intersections */
333 IDESC *v_inter; /* to store the vertical intersections */
334 int hinter_nb, vinter_nb; /* number of intrsections in both directions */
335 int dummy;
336 int p0, q0, p1, q1;
337 int *ra, *ca;
338 /* end of variables */
339 /* To simplify further calcul we change the matrix indexation from
340 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */
341 if (m == 0 || n == 0)
342 return;
343 ia -= 1;
344 ja -= 1;
345 ib -= 1;
346 jb -= 1;
347 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum);
348 gcontext = globcontext;
349 nprocs = nprow * npcol;
350 /* if the global context that is given to us has not the shape of a line
351 * (nprow != 1), create a new context. TODO: to be optimal, we should
352 * avoid this because it is an uncessary synchronisation */
353 if (nprow != 1) {
354 gridreshape(&gcontext);
355 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum);
356 }
357 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0);
358 /* compatibility T3D, must check myprow and mypcol are within bounds */
359 if (myprow0 >= p0 || mypcol0 >= q0)
360 myprow0 = mypcol0 = -1;
361 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
362 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1);
363 if (myprow1 >= p1 || mypcol1 >= q1)
364 myprow1 = mypcol1 = -1;
365 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
366 /* exchange the missing parameters among the processors: shape of grids and
367 * location of the processors */
368 param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int));
369 ra = param + nprocs * 2 + NBPARAM;
370 ca = param + (nprocs * 2 + NBPARAM) * 2;
371 for (i = 0; i < nprocs * 2 + NBPARAM; i++)
372 param[i] = MAGIC_MAX;
373 proc0 = param + NBPARAM;
374 proc1 = param + NBPARAM + nprocs;
375 /* we calulate proc0 and proc1 that will give the number of a proc in
376 * respectively a or b in the global context */
377 if (myprow0 >= 0) {
378 proc0[myprow0 * q0 + mypcol0] = mypnum;
379 param[0] = p0;
380 param[1] = q0;
381 param[4] = ma->m;
382 param[5] = ma->n;
383 param[6] = ma->nbrow;
384 param[7] = ma->nbcol;
385 param[8] = ma->sprow;
386 param[9] = ma->spcol;
387 param[10] = ia;
388 param[11] = ja;
389 }
390 if (myprow1 >= 0) {
391 proc1[myprow1 * q1 + mypcol1] = mypnum;
392 param[2] = p1;
393 param[3] = q1;
394 param[12] = mb->m;
395 param[13] = mb->n;
396 param[14] = mb->nbrow;
397 param[15] = mb->nbcol;
398 param[16] = mb->sprow;
399 param[17] = mb->spcol;
400 param[18] = ib;
401 param[19] = jb;
402 }
403 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM,
404 ra, ca, 2 * nprocs + NBPARAM, -1, -1);
405 newa = *ma;
406 newb = *mb;
407 ma = &newa;
408 mb = &newb;
409 if (myprow0 == -1) {
410 p0 = param[0];
411 q0 = param[1];
412 ma->m = param[4];
413 ma->n = param[5];
414 ma->nbrow = param[6];
415 ma->nbcol = param[7];
416 ma->sprow = param[8];
417 ma->spcol = param[9];
418 ia = param[10];
419 ja = param[11];
420 }
421 if (myprow1 == -1) {
422 p1 = param[2];
423 q1 = param[3];
424 mb->m = param[12];
425 mb->n = param[13];
426 mb->nbrow = param[14];
427 mb->nbcol = param[15];
428 mb->sprow = param[16];
429 mb->spcol = param[17];
430 ib = param[18];
431 jb = param[19];
432 }
433 for (i = 0; i < NBPARAM; i++) {
434 if (param[i] == MAGIC_MAX) {
435 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
436 exit(1);
437 }
438 }
439 #ifndef NDEBUG
440 for (i = 0; i < p0 * q0; i++)
441 assert(proc0[i] >= 0 && proc0[i] < nprocs);
442 for (i = 0; i < p1 * q1; i++)
443 assert(proc1[i] >= 0 && proc1[i] < nprocs);
444 #endif
445 /* check the validity of the parameters */
446 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
447 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
448 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */
449 {
450 int decal;
451 ia = changeorigin(myprow0, ma->sprow, p0,
452 ma->nbrow, ia, &decal, &ma->sprow);
453 ptrmyblock += decal;
454 ja = changeorigin(mypcol0, ma->spcol, q0,
455 ma->nbcol, ja, &decal, &ma->spcol);
456 ptrmyblock += decal * ma->lda;
457 ma->m = ia + m;
458 ma->n = ja + n;
459 ib = changeorigin(myprow1, mb->sprow, p1,
460 mb->nbrow, ib, &decal, &mb->sprow);
461 ptrmynewblock += decal;
462 jb = changeorigin(mypcol1, mb->spcol, q1,
463 mb->nbcol, jb, &decal, &mb->spcol);
464 ptrmynewblock += decal * mb->lda;
465 mb->m = ib + m;
466 mb->n = jb + n;
467 if (p0 == 1)
468 ma->nbrow = ma->m;
469 if (q0 == 1)
470 ma->nbcol = ma->n;
471 if (p1 == 1)
472 mb->nbrow = mb->m;
473 if (q1 == 1)
474 mb->nbcol = mb->n;
475 #ifndef NDEBUG
476 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
477 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
478 #endif
479 }
480 /* We compute the size of the memory buffer ( we choose the worst case,
481 * when the buffer sizes == the memory block sizes). */
482 if (myprow0 >= 0 && mypcol0 >= 0) {
483 /* Initialize pointer variables */
484 setmemory(&ptrsendbuff, memoryblocksize(ma));
485 }; /* if (mypnum < p0 * q0) */
486 if (myprow1 >= 0 && mypcol1 >= 0) {
487 /* Initialize pointer variables */
488 setmemory(&ptrrecvbuff, memoryblocksize(mb));
489 }; /* if (mypnum < p1 * q1) */
490 /* allocing room for the tabs, alloc for the worst case,local_n or local_m
491 * intervals, in fact the worst case should be less, perhaps half that,I
492 * should think of that one day. */
493 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) *
494 ma->nbcol * sizeof(IDESC));
495 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow)
496 * ma->nbrow * sizeof(IDESC));
497 /* We go for the scanning of indices. For each processor including mypnum,
498 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send
499 * it. Then for each processor, we compute the size of message to be
500 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements
501 * of recvbuff the right place (scanD)(RECVBUFF)) */
502 recvptr = ptrrecvbuff;
503 {
504 int tot, myrang, step, sens;
505 int *sender, *recver;
506 int mesending, merecving;
507 tot = max(p0 * q0, p1 * q1);
508 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
509 &sender, &recver, &myrang);
510 if (myrang == -1)
511 goto after_comm;
512 mesending = myprow0 >= 0;
513 assert(sender[myrang] >= 0 || !mesending);
514 assert(!mesending || proc0[sender[myrang]] == mypnum);
515 merecving = myprow1 >= 0;
516 assert(recver[myrang] >= 0 || !merecving);
517 assert(!merecving || proc1[recver[myrang]] == mypnum);
518 step = tot - 1 - myrang;
519 do {
520 for (sens = 0; sens < 2; sens++) {
521 /* be careful here, when we communicating with ourselves, we must
522 * send first (myrang > step == 0) */
523 if (mesending && recver[step] >= 0 &&
524 (sens == 0)) {
525 i = recver[step] / q1;
526 j = recver[step] % q1;
527 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
528 v_inter);
529 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
530 h_inter);
531 scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize,
532 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
533 v_inter, vinter_nb, h_inter, hinter_nb,
534 ptrmyblock);
535 } /* if (mesending...) { */
536 if (mesending && recver[step] >= 0 &&
537 (sens == myrang > step)) {
538 i = recver[step] / q1;
539 j = recver[step] % q1;
540 if (sendsize > 0
541 && (step != myrang || !merecving)
542 ) {
543 Cigesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize,
544 0, proc1[i * q1 + j]);
545 } /* sendsize > 0 */
546 } /* if (mesending ... */
547 if (merecving && sender[step] >= 0 &&
548 (sens == myrang <= step)) {
549 i = sender[step] / q0;
550 j = sender[step] % q0;
551 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
552 v_inter);
553 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
554 h_inter);
555 scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize,
556 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
557 v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL);
558 if (recvsize > 0) {
559 if (step == myrang && mesending) {
560 Clacpy(recvsize, 1,
561 ptrsendbuff, recvsize,
562 ptrrecvbuff, recvsize);
563 } else {
564 Cigerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize,
565 0, proc0[i * q0 + j]);
566 }
567 } /* recvsize > 0 */
568 } /* if (merecving ...) */
569 if (merecving && sender[step] >= 0 && sens == 1) {
570 scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize,
571 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
572 v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock);
573 } /* if (merecving...) */
574 } /* for (sens = 0) */
575 step -= 1;
576 if (step < 0)
577 step = tot - 1;
578 } while (step != tot - 1 - myrang);
579 after_comm:
580 free(sender);
581 } /* { int tot,nr,ns ...} */
582 /* don't forget to clean up things! */
583 if (myprow1 >= 0 && mypcol1 >= 0) {
584 freememory((char *) ptrrecvbuff);
585 };
586 if (myprow0 >= 0 && mypcol0 >= 0) {
587 freememory((char *) ptrsendbuff);
588 };
589 if (nprow != 1)
590 Cblacs_gridexit(gcontext);
591 free(v_inter);
592 free(h_inter);
593 free(param);
594 }/* distrib */
595 static2 void
init_chenille(mypnum,nprocs,n0,proc0,n1,proc1,psend,precv,myrang)596 init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang)
597 int nprocs, mypnum, n0, n1;
598 int *proc0, *proc1, **psend, **precv, *myrang;
599 {
600 int ns, nr, i, tot;
601 int *sender, *recver, *g0, *g1;
602 tot = max(n0, n1);
603 sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2);
604 recver = sender + tot;
605 *psend = sender;
606 *precv = recver;
607 g0 = recver + tot;
608 g1 = g0 + nprocs;
609 for (i = 0; i < nprocs; i++) {
610 g0[i] = -1;
611 g1[i] = -1;
612 }
613 for (i = 0; i < tot; i++) {
614 sender[i] = -1;
615 recver[i] = -1;
616 }
617 for (i = 0; i < n0; i++)
618 g0[proc0[i]] = i;
619 for (i = 0; i < n1; i++)
620 g1[proc1[i]] = i;
621 ns = 0;
622 nr = 0;
623 *myrang = -1;
624 for (i = 0; i < nprocs; i++)
625 if (g0[i] >= 0 && g1[i] >= 0) {
626 if (i == mypnum)
627 *myrang = nr;
628 sender[ns] = g0[i];
629 ns += 1;
630 recver[nr] = g1[i];
631 nr += 1;
632 assert(ns <= n0 && nr <= n1 && nr == ns);
633 }
634 for (i = 0; i < nprocs; i++)
635 if (g0[i] >= 0 && g1[i] < 0) {
636 if (i == mypnum)
637 *myrang = ns;
638 sender[ns] = g0[i];
639 ns += 1;
640 assert(ns <= n0);
641 }
642 for (i = 0; i < nprocs; i++)
643 if (g1[i] >= 0 && g0[i] < 0) {
644 if (i == mypnum)
645 *myrang = nr;
646 recver[nr] = g1[i];
647 nr += 1;
648 assert(nr <= n1);
649 }
650 }
651 void
Clacpy(m,n,a,lda,b,ldb)652 Clacpy(m, n, a, lda, b, ldb)
653 int *a, *b;
654 int m, n, lda, ldb;
655 {
656 int i, j;
657 lda -= m;
658 ldb -= m;
659 assert(lda >= 0 && ldb >= 0);
660 for (j = 0; j < n; j++) {
661 for (i = 0; i < m; i++)
662 *b++ = *a++;
663 b += ldb;
664 a += lda;
665 }
666 }
667 static2 void
gridreshape(ctxtp)668 gridreshape(ctxtp)
669 int *ctxtp;
670 {
671 int ori, final; /* original context, and new context created, with
672 * line form */
673 int nprow, npcol, myrow, mycol;
674 int *usermap;
675 int i, j;
676 ori = *ctxtp;
677 Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol);
678 usermap = mr2d_malloc(sizeof(int) * nprow * npcol);
679 for (i = 0; i < nprow; i++)
680 for (j = 0; j < npcol; j++) {
681 usermap[i + j * nprow] = Cblacs_pnum(ori, i, j);
682 }
683 /* Cblacs_get(0, 0, &final); */
684 Cblacs_get(ori, 10, &final);
685 Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol);
686 *ctxtp = final;
687 free(usermap);
688 }
689