1 #include "redist.h"
2 /* $Id: pgemraux.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
3 *
4 * some functions used by the pigemr2d routine see file pigemr.c for more
5 * documentation.
6 *
7 * Created March 1993 by B. Tourancheau (See sccs for modifications). */
8 #define static2 static
9 #if defined(Add_) || defined(f77IsF2C)
10 #define fortran_mr2d pigemr2do_
11 #define fortran_mr2dnew pigemr2d_
12 #elif defined(UpCase)
13 #define fortran_mr2dnew PIGEMR2D
14 #define fortran_mr2d PIGEMR2DO
15 #define icopy_ ICOPY
16 #define ilacpy_ ILACPY
17 #else
18 #define fortran_mr2d pigemr2do
19 #define fortran_mr2dnew pigemr2d
20 #define icopy_ icopy
21 #define ilacpy_ ilacpy
22 #endif
23 #define Clacpy Cigelacpy
24 void Clacpy();
25 typedef struct {
26 int desctype;
27 int ctxt;
28 int m;
29 int n;
30 int nbrow;
31 int nbcol;
32 int sprow;
33 int spcol;
34 int lda;
35 } MDESC;
36 #define BLOCK_CYCLIC_2D 1
37 typedef struct {
38 int lstart;
39 int len;
40 } IDESC;
41 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
42 #define max(A,B) ((A)>(B)?(A):(B))
43 #define min(A,B) ((A)>(B)?(B):(A))
44 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
45 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
46 #ifdef MALLOCDEBUG
47 #define malloc mymalloc
48 #define free myfree
49 #define realloc myrealloc
50 #endif
51 /* Cblacs */
52 extern void Cblacs_pcoord();
53 extern int Cblacs_pnum();
54 extern void Csetpvmtids();
55 extern void Cblacs_get();
56 extern void Cblacs_pinfo();
57 extern void Cblacs_gridinfo();
58 extern void Cblacs_gridinit();
59 extern void Cblacs_exit();
60 extern void Cblacs_gridexit();
61 extern void Cblacs_setup();
62 extern void Cigebs2d();
63 extern void Cigebr2d();
64 extern void Cigesd2d();
65 extern void Cigerv2d();
66 extern void Cigsum2d();
67 extern void Cigamn2d();
68 extern void Cigamx2d();
69 extern void Cigesd2d();
70 extern void Cigerv2d();
71 /* lapack */
72 void ilacpy_();
73 /* aux fonctions */
74 extern int localindice();
75 extern void *mr2d_malloc();
76 extern int ppcm();
77 extern int localsize();
78 extern int memoryblocksize();
79 extern int changeorigin();
80 extern void paramcheck();
81 /* tools and others function */
82 #define scanD0 igescanD0
83 #define dispmat igedispmat
84 #define setmemory igesetmemory
85 #define freememory igefreememory
86 #define scan_intervals igescan_intervals
87 extern void scanD0();
88 extern void dispmat();
89 extern void setmemory();
90 extern void freememory();
91 extern int scan_intervals();
92 extern void Cpigemr2do();
93 extern void Cpigemr2d();
94 /* some defines for Cpigemr2do */
95 #define SENDBUFF 0
96 #define RECVBUFF 1
97 #define SIZEBUFF 2
98 #if 0
99 #define DEBUG
100 #endif
101 #ifndef DEBUG
102 #define NDEBUG
103 #endif
104 #include <stdio.h>
105 #include <stdlib.h>
106 #include <assert.h>
107 void *
mr2d_malloc(n)108 mr2d_malloc(n)
109 int n;
110 {
111 void *ptr;
112 assert(n > 0);
113 ptr = (void *) malloc(n);
114 if (ptr == NULL) {
115 fprintf(stderr, "xxmr2d:out of memory\n");
116 exit(2);
117 }
118 return ptr;
119 }
120 int
pgcd(a,b)121 pgcd(a, b)
122 int a, b;
123 {
124 int aux;
125 if (a < b)
126 return pgcd(b, a);
127 else {
128 aux = a % b;
129 if (aux == 0)
130 return b;
131 else
132 return pgcd(b, aux);
133 }
134 }
135 int
ppcm(a,b)136 ppcm(a, b)
137 int a, b;
138 {
139 int pg;
140 pg = pgcd(a, b);
141 return a * (b / pg);
142 }
143 /* localsize:return the number of rows on the local processor given by its
144 * row number myprow, of a distributed matrix with m rows distributed of on a
145 * grid of processors with p rows with blocksize nbrow : this procedure can
146 * also be used to compute the number of cols by replacing rows by cols */
147 int
localsize(myprow,p,nbrow,m)148 localsize(myprow, p, nbrow, m)
149 int myprow, p, nbrow, m;
150 {
151 int templateheight, blockheight;
152 templateheight = p * nbrow;
153 if (m % templateheight != 0) { /* not an exact boundary */
154 if ((m % templateheight) > (nbrow * myprow)) { /* processor
155 * (myprow,mypcol) has
156 * some elements in that
157 * incomplete template */
158 if ((m % templateheight) >= (nbrow * (myprow + 1))) { /* processor
159 * (myprow,mypcol)'s
160 * part is complete */
161 blockheight = (m / templateheight) * nbrow + nbrow;
162 } else { /* processor (myprow,mypcol)'s part is not complete */
163 blockheight = (m / templateheight) * nbrow + (m % nbrow);
164 }; /* if ((m%templateheight) > (nbrow*(myprow+1))) */
165 } else { /* processor (myprow,mypcol) has no element in that
166 * incomplete template */
167 blockheight = (m / templateheight) * nbrow;
168 }; /* if ((m%templateheight) > (nbrow*myprow)) */
169 } else { /* exact boundary */
170 blockheight = m / p; /* (m/templateheight) * nbrow */
171 }; /* if (m%templateheight !=0) */
172 return blockheight;
173 }
174 /****************************************************************/
175 /* Returns the exact memory block size corresponding to the parameters */
176 int
memoryblocksize(a)177 memoryblocksize(a)
178 MDESC *a;
179 {
180 int myprow, mypcol, p, q;
181 /* Compute the (myprow,mypcol) indices of processor mypnum in P0xQ0 We
182 * assume the row-major ordering of the BLACS */
183 Cblacs_gridinfo(a->ctxt, &p, &q, &myprow, &mypcol);
184 myprow = SHIFT(myprow, a->sprow, p);
185 mypcol = SHIFT(mypcol, a->spcol, q);
186 assert(myprow >= 0 && mypcol >= 0);
187 return localsize(myprow, p, a->nbrow, a->m) *
188 localsize(mypcol, q, a->nbcol, a->n);
189 }
190 void
checkequal(ctxt,a)191 checkequal(ctxt, a)
192 int a, ctxt;
193 {
194 int np, dummy, nbrow, myp, b;
195 Cblacs_gridinfo(ctxt, &nbrow, &np, &dummy, &myp);
196 assert(nbrow == 1);
197 if (np == 1)
198 return;
199 if (myp == 0) {
200 Cigesd2d(ctxt, 1, 1, &a, 1, 0, 1);
201 Cigerv2d(ctxt, 1, 1, &b, 1, 0, np - 1);
202 assert(a == b);
203 } else {
204 Cigerv2d(ctxt, 1, 1, &b, 1, 0, myp - 1);
205 assert(a == b);
206 Cigesd2d(ctxt, 1, 1, &a, 1, 0, (myp + 1) % np);
207 }
208 }
209 void
paramcheck(a,i,j,m,n,p,q,gcontext)210 paramcheck(a, i, j, m, n, p, q, gcontext)
211 MDESC *a;
212 int i, j, m, n, p, q;
213 {
214 int p2, q2, myprow, mypcol;
215 #ifndef NDEBUG
216 checkequal(gcontext, p);
217 checkequal(gcontext, q);
218 checkequal(gcontext, a->sprow);
219 checkequal(gcontext, a->spcol);
220 checkequal(gcontext, a->m);
221 checkequal(gcontext, a->n);
222 checkequal(gcontext, i);
223 checkequal(gcontext, j);
224 checkequal(gcontext, a->nbrow);
225 checkequal(gcontext, a->nbcol);
226 #endif
227 Cblacs_gridinfo(a->ctxt, &p2, &q2, &myprow, &mypcol);
228 /* compatibility T3D, must check myprow and mypcol are within bounds */
229 if (myprow >= p2 || mypcol >= q2)
230 myprow = mypcol = -1;
231 if ((myprow >= 0 || mypcol >= 0) && (p2 != p && q2 != q)) {
232 fprintf(stderr, "??MR2D:incoherent p,q parameters\n");
233 exit(1);
234 }
235 assert(myprow < p && mypcol < q);
236 if (a->sprow < 0 || a->sprow >= p || a->spcol < 0 || a->spcol >= q) {
237 fprintf(stderr, "??MR2D:Bad first processor coordinates\n");
238 exit(1);
239 }
240 if (i < 0 || j < 0 || i + m > a->m || j + n > a->n) {
241 fprintf(stderr, "??MR2D:Bad submatrix:i=%d,j=%d,\
242 m=%d,n=%d,M=%d,N=%d\n",
243 i, j, m, n, a->m, a->n);
244 exit(1);
245 }
246 if ((myprow >= 0 || mypcol >= 0) &&
247 localsize(SHIFT(myprow, a->sprow, p), p, a->nbrow, a->m) > a->lda) {
248 fprintf(stderr, "??MR2D:bad lda arg:row=%d,m=%d,p=%d,\
249 nbrow=%d,lda=%d,sprow=%d\n",
250 myprow, a->m, p, a->nbrow, a->lda, a->sprow);
251 exit(1);
252 }
253 }
254 /* to change from the submatrix beginning at line i to one beginning at line
255 * i' with i'< blocksize return the line number on the local process where
256 * the new matrix begin, the new process number, and i' */
257 int
changeorigin(myp,sp,p,bs,i,decal,newsp)258 changeorigin(myp, sp, p, bs, i, decal, newsp)
259 int myp, sp, p, bs, i;
260 int *decal, *newsp;
261 {
262 int tempheight, firstblock, firsttemp;
263 /* we begin by changing the parameters so that ia < templatewidth,... */
264 tempheight = bs * p;
265 firsttemp = i / tempheight;
266 firstblock = (i / bs) % p;
267 *newsp = (sp + firstblock) % p;
268 if (myp >= 0)
269 *decal = firsttemp * bs + (SHIFT(myp, sp, p) < firstblock ? bs : 0);
270 else
271 *decal = 0;
272 return i % bs;
273 }
274 /******************************************************************/
275 /* Return the indice in local memory of element of indice a in the matrix */
276 int
localindice(ig,jg,templateheight,templatewidth,a)277 localindice(ig, jg, templateheight, templatewidth, a)
278 int templateheight, templatewidth, ig, jg;
279 MDESC *a;
280 /* Return the indice in local memory (scattered distribution) of the element
281 * of indice a in global matrix */
282 {
283 int vtemp, htemp, vsubtemp, hsubtemp, il, jl;
284 assert(ig >= 0 && ig < a->m && jg >= 0 && jg < a->n);
285 /* coordinates in global matrix with the tests in intersect, ig MUST BE in
286 * [0..m] and jg in [0..n] */
287 /* coordinates of the template that "owns" the element */
288 vtemp = ig / templateheight;
289 htemp = jg / templatewidth;
290 /* coordinates of the element in the subblock of the (vtemp, htemp)
291 * template */
292 vsubtemp = ig % a->nbrow;
293 hsubtemp = jg % a->nbcol;
294 /* coordinates of the element in the local block of the processor */
295 il = a->nbrow * vtemp + vsubtemp;
296 jl = a->nbcol * htemp + hsubtemp;
297 assert(il < a->lda);
298 #ifndef NDEBUG
299 {
300 int pr, pc, p, q, lp, lq;
301 Cblacs_gridinfo(a->ctxt, &p, &q, &pr, &pc);
302 p = templateheight / a->nbrow;
303 q = templatewidth / a->nbcol;
304 lp = ig % templateheight / a->nbrow;
305 lq = jg % templatewidth / a->nbcol;
306 assert(lp == SHIFT(pr, a->sprow, p));
307 assert(lq == SHIFT(pc, a->spcol, q));
308 }
309 #endif
310 return (jl * a->lda + il);
311 }
312