1 #include "redist.h"
2 /* $Id: pztrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $
3 *
4 * pztrmrdrv.c :
5 *
6 *
7 * PURPOSE:
8 *
9 * this driver is testing the PZTRMR2D routine. It calls it to obtain a new
10 * scattered block data decomposition of a distributed COMPLEX*16 (block
11 * scattered) matrix. Then it calls PZTRMR2D for the inverse redistribution
12 * and checks the results with the initial data.
13 *
14 * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the
15 * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the
16 * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition
17 * on the processor grid p0 x q0.
18 *
19 * See pztrmr.c file for detailed info on the PZTRMR2D function.
20 *
21 *
22 * The testing parameters are read from the file TRMR2D.dat, see the file in the
23 * distribution to have an example.
24 *
25 * created by Bernard Tourancheau in April 1994.
26 *
27 * modifications : see sccs history
28 *
29 * ===================================
30 *
31 *
32 * NOTE :
33 *
34 * - the matrix elements are COMPLEX*16
35 *
36 * - memory requirements : this procedure requires approximately 3 times the
37 * memory space of the initial data block in grid 0 (initial block, copy for
38 * test and second redistribution result) and 1 time the memory space of the
39 * result data block in grid 1. with the element size = sizeof(dcomplex)
40 * bytes,
41 *
42 *
43 * - use the procedures of the files:
44 *
45 * pztrmr.o pztrmr2.o pztrmraux.o
46 *
47 *
48 * ======================================
49 *
50 * WARNING ASSUMPTIONS :
51 *
52 *
53 * ========================================
54 *
55 *
56 * Planned changes:
57 *
58 *
59 *
60 * ========================================= */
61 #define static2 static
62 #if defined(Add_) || defined(f77IsF2C)
63 #define fortran_mr2d pztrmr2do_
64 #define fortran_mr2dnew pztrmr2d_
65 #elif defined(UpCase)
66 #define fortran_mr2dnew PZTRMR2D
67 #define fortran_mr2d PZTRMR2DO
68 #define zcopy_ ZCOPY
69 #define zlacpy_ ZLACPY
70 #else
71 #define fortran_mr2d pztrmr2do
72 #define fortran_mr2dnew pztrmr2d
73 #define zcopy_ zcopy
74 #define zlacpy_ zlacpy
75 #endif
76 #define Clacpy Cztrlacpy
77 void Clacpy();
78 typedef struct {
79 double r, i;
80 } dcomplex;
81 typedef struct {
82 int desctype;
83 int ctxt;
84 int m;
85 int n;
86 int nbrow;
87 int nbcol;
88 int sprow;
89 int spcol;
90 int lda;
91 } MDESC;
92 #define BLOCK_CYCLIC_2D 1
93 typedef struct {
94 int gstart;
95 int len;
96 } IDESC;
97 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
98 #define max(A,B) ((A)>(B)?(A):(B))
99 #define min(A,B) ((A)>(B)?(B):(A))
100 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
101 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
102 #ifdef MALLOCDEBUG
103 #define malloc mymalloc
104 #define free myfree
105 #define realloc myrealloc
106 #endif
107 /* Cblacs */
108 extern void Cblacs_pcoord();
109 extern int Cblacs_pnum();
110 extern void Csetpvmtids();
111 extern void Cblacs_get();
112 extern void Cblacs_pinfo();
113 extern void Cblacs_gridinfo();
114 extern void Cblacs_gridinit();
115 extern void Cblacs_exit();
116 extern void Cblacs_gridexit();
117 extern void Cblacs_setup();
118 extern void Cigebs2d();
119 extern void Cigebr2d();
120 extern void Cigesd2d();
121 extern void Cigerv2d();
122 extern void Cigsum2d();
123 extern void Cigamn2d();
124 extern void Cigamx2d();
125 extern void Czgesd2d();
126 extern void Czgerv2d();
127 /* lapack */
128 void zlacpy_();
129 /* aux fonctions */
130 extern int localindice();
131 extern void *mr2d_malloc();
132 extern int ppcm();
133 extern int localsize();
134 extern int memoryblocksize();
135 extern int changeorigin();
136 extern void paramcheck();
137 /* tools and others function */
138 #define scanD0 ztrscanD0
139 #define dispmat ztrdispmat
140 #define setmemory ztrsetmemory
141 #define freememory ztrfreememory
142 #define scan_intervals ztrscan_intervals
143 extern void scanD0();
144 extern void dispmat();
145 extern void setmemory();
146 extern void freememory();
147 extern int scan_intervals();
148 extern void Cpztrmr2do();
149 extern void Cpztrmr2d();
150 /* some defines for Cpztrmr2do */
151 #define SENDBUFF 0
152 #define RECVBUFF 1
153 #define SIZEBUFF 2
154 #if 0
155 #define DEBUG
156 #endif
157 #ifndef DEBUG
158 #define NDEBUG
159 #endif
160 #include <stdio.h>
161 #include <stdlib.h>
162 #include <string.h>
163 #include <ctype.h>
164 #include <assert.h>
165 /* initblock: intialize the local part of a matrix with random data (well,
166 * not very random) */
167 static2 void
initblock(block,m,n)168 initblock(block, m, n)
169 dcomplex *block;
170 int m, n;
171 {
172 dcomplex *pdata;
173 int i;
174 pdata = block;
175 for (i = 0; i < m * n; i++, pdata++) {
176 (*pdata).r = i;
177 };
178 }
179 /* getparam:read from a file a list of integer parameters, the end of the
180 * parameters to read is given by a NULL at the end of the args list */
181 #ifdef __STDC__
182 #include <stdarg.h>
183 static void
getparam(FILE * f,...)184 getparam(FILE * f,...)
185 {
186 #else
187 #include <varargs.h>
188 static void
189 getparam(va_alist)
190 va_dcl
191 {
192 FILE *f;
193 #endif
194 va_list ap;
195 int i;
196 static int nbline;
197 char *ptr, *next;
198 int *var;
199 static char buffer[200];
200 #ifdef __STDC__
201 va_start(ap, f);
202 #else
203 va_start(ap);
204 f = va_arg(ap, FILE *);
205 #endif
206 do {
207 next = fgets(buffer, 200, f);
208 if (next == NULL) {
209 fprintf(stderr, "bad configuration driver file:after line %d\n", nbline);
210 exit(1);
211 }
212 nbline += 1;
213 } while (buffer[0] == '#');
214 ptr = buffer;
215 var = va_arg(ap, int *);
216 while (var != NULL) {
217 *var = strtol(ptr, &next, 10);
218 if (ptr == next) {
219 fprintf(stderr, "bad configuration driver file:error line %d\n", nbline);
220 exit(1);
221 }
222 ptr = next;
223 var = va_arg(ap, int *);
224 }
225 va_end(ap);
226 }
227 void
228 initforpvm(argc, argv)
229 int argc;
230 char *argv[];
231 {
232 int pnum, nproc;
233 Cblacs_pinfo(&pnum, &nproc);
234 if (nproc < 1) { /* we are with PVM */
235 if (pnum == 0) {
236 if (argc < 2) {
237 fprintf(stderr, "usage with PVM:xztrmr nbproc\n\
238 \t where nbproc is the number of nodes to initialize\n");
239 exit(1);
240 }
241 nproc = atoi(argv[1]);
242 }
243 Cblacs_setup(&pnum, &nproc);
244 }
245 }
246 int
247 main(argc, argv)
248 int argc;
249 char *argv[];
250 {
251 /* We initialize the data-block on the current processor, then redistribute
252 * it, and perform the inverse redistribution to compare the local memory
253 * with the initial one. */
254 /* Data file */
255 FILE *fp;
256 int nbre, nbremax;
257 /* Data distribution 0 parameters */
258 int p0, /* # of rows in the processor grid */
259 q0; /* # of columns in the processor grid */
260 /* Data distribution 1 parameters */
261 int p1, q1;
262 /* # of parameter to be read on the keyboard */
263 #define nbparameter 24
264 /* General variables */
265 int blocksize0;
266 int mypnum, nprocs;
267 int parameters[nbparameter], nberrors;
268 int i;
269 int ia, ja, ib, jb, m, n;
270 int gcontext, context0, context1;
271 int myprow1, myprow0, mypcol0, mypcol1;
272 int dummy;
273 MDESC ma, mb;
274 char *uplo, *diag;
275 dcomplex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide;
276 #ifdef UsingMpiBlacs
277 MPI_Init(&argc, &argv);
278 #endif
279 setvbuf(stdout, NULL, _IOLBF, 0);
280 setvbuf(stderr, NULL, _IOLBF, 0);
281 #ifdef T3D
282 free(malloc(14000000));
283 #endif
284 initforpvm(argc, argv);
285 /* Read physical parameters */
286 Cblacs_pinfo(&mypnum, &nprocs);
287 /* initialize BLACS for the parameter communication */
288 Cblacs_get(0, 0, &gcontext);
289 Cblacs_gridinit(&gcontext, "R", nprocs, 1);
290 Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy);
291 if (mypnum == 0) {
292 if ((fp = fopen("TRMR2D.dat", "r")) == NULL) {
293 fprintf(stderr, "Can't open TRMR2D.dat\n");
294 exit(1);
295 };
296 printf("\n// ZTRMR2D TESTER for COMPLEX*16 //\n");
297 getparam(fp, &nbre, NULL);
298 printf("////////// %d tests \n\n", nbre);
299 parameters[0] = nbre;
300 Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1);
301 } else {
302 Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0);
303 nbre = parameters[0];
304 };
305 if (mypnum == 0) {
306 printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \
307 m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n");
308 };
309 /****** TEST LOOP *****/
310 /* Here we are in grip 1xnprocs */
311 nbremax = nbre;
312 #ifdef DEBUG
313 fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum);
314 #endif
315 while (nbre-- != 0) { /* Loop on the serie of tests */
316 /* All the processors read the parameters so we have to be in a 1xnprocs
317 * grid at each iteration */
318 /* Read processors grid and matrices parameters */
319 if (mypnum == 0) {
320 int u, d;
321 getparam(fp,
322 &m, &n,
323 &ma.m, &ma.n, &ma.sprow, &ma.spcol,
324 &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol,
325 &mb.m, &mb.n, &mb.sprow, &mb.spcol,
326 &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol,
327 &u, &d,
328 NULL);
329 uplo = u ? "UPPER" : "LOWER";
330 diag = d ? "UNIT" : "NONUNIT";
331 printf("\t\t************* TEST # %d **********\n",
332 nbremax - nbre);
333 printf(" %3d %3d %3d %3d %3d %3d %3d %3d \
334 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d",
335 m, n,
336 ma.m, ma.n, ma.sprow, ma.spcol,
337 ia, ja, p0, q0, ma.nbrow, ma.nbcol,
338 mb.m, mb.n, mb.sprow, mb.spcol,
339 ib, jb, p1, q1, mb.nbrow, mb.nbcol);
340 printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low",
341 toupper(*diag) == 'U' ? "unit" : "nonunit");
342 printf("\n");
343 if (p0 * q0 > nprocs || p1 * q1 > nprocs) {
344 fprintf(stderr, "not enough nodes:%d processors required\n",
345 max(p0 * q0, p1 * q1));
346 exit(1);
347 }
348 parameters[0] = p0;
349 parameters[1] = q0;
350 parameters[2] = ma.nbrow;
351 parameters[3] = ma.nbcol;
352 parameters[4] = p1;
353 parameters[5] = q1;
354 parameters[6] = mb.nbrow;
355 parameters[7] = mb.nbcol;
356 parameters[8] = ma.m;
357 parameters[9] = ma.n;
358 parameters[10] = ma.sprow;
359 parameters[11] = ma.spcol;
360 parameters[12] = mb.sprow;
361 parameters[13] = mb.spcol;
362 parameters[14] = ia;
363 parameters[15] = ja;
364 parameters[16] = ib;
365 parameters[17] = jb;
366 parameters[18] = m;
367 parameters[19] = n;
368 parameters[20] = mb.m;
369 parameters[21] = mb.n;
370 parameters[22] = *uplo == 'U';
371 parameters[23] = *diag == 'U';
372 Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1);
373 } else {
374 Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0);
375 p0 = parameters[0];
376 q0 = parameters[1];
377 ma.nbrow = parameters[2];
378 ma.nbcol = parameters[3];
379 p1 = parameters[4];
380 q1 = parameters[5];
381 mb.nbrow = parameters[6];
382 mb.nbcol = parameters[7];
383 ma.m = parameters[8];
384 ma.n = parameters[9];
385 ma.sprow = parameters[10];
386 ma.spcol = parameters[11];
387 mb.sprow = parameters[12];
388 mb.spcol = parameters[13];
389 ia = parameters[14];
390 ja = parameters[15];
391 ib = parameters[16];
392 jb = parameters[17];
393 m = parameters[18];
394 n = parameters[19];
395 mb.m = parameters[20];
396 mb.n = parameters[21];
397 ma.desctype = BLOCK_CYCLIC_2D;
398 mb.desctype = BLOCK_CYCLIC_2D;
399 uplo = parameters[22] ? "UPPER" : "LOWER";
400 diag = parameters[23] ? "UNIT" : "NONUNIT";
401 };
402 Cblacs_get(0, 0, &context0);
403 Cblacs_gridinit(&context0, "R", p0, q0);
404 Cblacs_get(0, 0, &context1);
405 Cblacs_gridinit(&context1, "R", p1, q1);
406 Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0);
407 if (myprow0 >= p0 || mypcol0 >= q0)
408 myprow0 = mypcol0 = -1;
409 Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1);
410 if (myprow1 >= p1 || mypcol1 >= q1)
411 myprow1 = mypcol1 = -1;
412 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
413 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
414 ma.ctxt = context0;
415 mb.ctxt = context1;
416 /* From here, we are not assuming that only the processors working in the
417 * redistribution are calling xxMR2D, but the ones not concerned will do
418 * nothing. */
419 /* We compute the exact size of the local memory block for the memory
420 * allocations */
421 if (myprow0 >= 0 && mypcol0 >= 0) {
422 blocksize0 = memoryblocksize(&ma);
423 ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m);
424 setmemory(&ptrmyblock, blocksize0);
425 initblock(ptrmyblock, 1, blocksize0);
426 setmemory(&ptrmyblockcopy, blocksize0);
427 memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock,
428 blocksize0 * sizeof(dcomplex));
429 setmemory(&ptrmyblockvide, blocksize0);
430 for (i = 0; i < blocksize0; i++)
431 ptrmyblockvide[i].r = -1;
432 }; /* if (mypnum < p0 * q0) */
433 if (myprow1 >= 0 && mypcol1 >= 0) {
434 setmemory(&ptrsavemyblock, memoryblocksize(&mb));
435 mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m);
436 }; /* if (mypnum < p1 * q1) */
437 /* Redistribute the matrix from grid 0 to grid 1 (memory location
438 * ptrmyblock to ptrsavemyblock) */
439 Cpztrmr2d(uplo, diag, m, n,
440 ptrmyblock, ia, ja, &ma,
441 ptrsavemyblock, ib, jb, &mb, gcontext);
442 /* Perform the inverse redistribution of the matrix from grid 1 to grid 0
443 * (memory location ptrsavemyblock to ptrmyblockvide) */
444 Cpztrmr2d(uplo, diag, m, n,
445 ptrsavemyblock, ib, jb, &mb,
446 ptrmyblockvide, ia, ja, &ma, gcontext);
447 /* Check the differences */
448 nberrors = 0;
449 if (myprow0 >= 0 && mypcol0 >= 0) {
450 /* only for the processors that do have data at the begining */
451 for (i = 0; i < blocksize0; i++) {
452 int li, lj, gi, gj;
453 int in;
454 in = 1;
455 li = i % ma.lda;
456 lj = i / ma.lda;
457 gi = (li / ma.nbrow) * p0 * ma.nbrow +
458 SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow;
459 gj = (lj / ma.nbcol) * q0 * ma.nbcol +
460 SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol;
461 assert(gi < ma.m && gj < ma.n);
462 gi -= (ia - 1);
463 gj -= (ja - 1);
464 if (gi < 0 || gj < 0 || gi >= m || gj >= n)
465 in = 0;
466 else if (toupper(*uplo) == 'U')
467 in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U'));
468 else
469 in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U'));
470 if (!in) {
471 ptrmyblockcopy[i].r = -1;
472 }
473 if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) {
474 nberrors++;
475 printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n"
476 ,mypnum, i,
477 ptrmyblockvide[i].r, ptrmyblockcopy[i].r);
478 };
479 };
480 if (nberrors > 0) {
481 printf("Processor %d, has tested %d COMPLEX*16 elements,\
482 Number of redistribution errors = %d \n",
483 mypnum, blocksize0, nberrors);
484 }
485 }
486 /* Look at the errors on all the processors at this point. */
487 Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0);
488 if (mypnum == 0)
489 if (nberrors)
490 printf(" => Total number of redistribution errors = %d \n",
491 nberrors);
492 else
493 printf("TEST PASSED OK\n");
494 /* release memory for the next iteration */
495 if (myprow0 >= 0 && mypcol0 >= 0) {
496 freememory((char *) ptrmyblock);
497 freememory((char *) ptrmyblockvide);
498 freememory((char *) ptrmyblockcopy);
499 }; /* if (mypnum < p0 * q0) */
500 /* release memory for the next iteration */
501 if (myprow1 >= 0 && mypcol1 >= 0) {
502 freememory((char *) ptrsavemyblock);
503 };
504 if (myprow0 >= 0)
505 Cblacs_gridexit(context0);
506 if (myprow1 >= 0)
507 Cblacs_gridexit(context1);
508 }; /* while nbre != 0 */
509 if (mypnum == 0) {
510 fclose(fp);
511 };
512 Cblacs_exit(0);
513 return 0;
514 }/* main */
515