1 #include "redist.h"
2 /* $Id: pdtrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $
3  *
4  * pdtrmrdrv.c :
5  *
6  *
7  * PURPOSE:
8  *
9  * this driver is testing the PDTRMR2D routine. It calls it to obtain a new
10  * scattered block data decomposition of a distributed DOUBLE PRECISION
11  * (block scattered) matrix. Then it calls PDTRMR2D for the inverse
12  * redistribution and checks the results with the initial data.
13  *
14  * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the
15  * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the
16  * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition
17  * on the processor grid p0 x q0.
18  *
19  * See pdtrmr.c file for detailed info on the PDTRMR2D function.
20  *
21  *
22  * The testing parameters are read from the file TRMR2D.dat, see the file in the
23  * distribution to have an example.
24  *
25  * created by Bernard Tourancheau in April 1994.
26  *
27  * modifications : see sccs history
28  *
29  * ===================================
30  *
31  *
32  * NOTE :
33  *
34  * - the matrix elements are DOUBLE PRECISION
35  *
36  * - memory requirements : this procedure requires approximately 3 times the
37  * memory space of the initial data block in grid 0 (initial block, copy for
38  * test and second redistribution result) and 1 time the memory space of the
39  * result data block in grid 1. with  the element size = sizeof(double)
40  * bytes,
41  *
42  *
43  * - use the procedures of the files:
44  *
45  * pdtrmr.o pdtrmr2.o pdtrmraux.o
46  *
47  *
48  * ======================================
49  *
50  * WARNING ASSUMPTIONS :
51  *
52  *
53  * ========================================
54  *
55  *
56  * Planned changes:
57  *
58  *
59  *
60  * ========================================= */
61 #define static2 static
62 #if defined(Add_) || defined(f77IsF2C)
63 #define fortran_mr2d pdtrmr2do_
64 #define fortran_mr2dnew pdtrmr2d_
65 #elif defined(UpCase)
66 #define fortran_mr2dnew PDTRMR2D
67 #define fortran_mr2d PDTRMR2DO
68 #define dcopy_ DCOPY
69 #define dlacpy_ DLACPY
70 #else
71 #define fortran_mr2d pdtrmr2do
72 #define fortran_mr2dnew pdtrmr2d
73 #define dcopy_ dcopy
74 #define dlacpy_ dlacpy
75 #endif
76 #define Clacpy Cdtrlacpy
77 void  Clacpy();
78 typedef struct {
79   int   desctype;
80   int   ctxt;
81   int   m;
82   int   n;
83   int   nbrow;
84   int   nbcol;
85   int   sprow;
86   int   spcol;
87   int   lda;
88 }     MDESC;
89 #define BLOCK_CYCLIC_2D 1
90 typedef struct {
91   int   gstart;
92   int   len;
93 }     IDESC;
94 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
95 #define max(A,B) ((A)>(B)?(A):(B))
96 #define min(A,B) ((A)>(B)?(B):(A))
97 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
98 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
99 #ifdef MALLOCDEBUG
100 #define malloc mymalloc
101 #define free myfree
102 #define realloc myrealloc
103 #endif
104 /* Cblacs */
105 extern void Cblacs_pcoord();
106 extern int Cblacs_pnum();
107 extern void Csetpvmtids();
108 extern void Cblacs_get();
109 extern void Cblacs_pinfo();
110 extern void Cblacs_gridinfo();
111 extern void Cblacs_gridinit();
112 extern void Cblacs_exit();
113 extern void Cblacs_gridexit();
114 extern void Cblacs_setup();
115 extern void Cigebs2d();
116 extern void Cigebr2d();
117 extern void Cigesd2d();
118 extern void Cigerv2d();
119 extern void Cigsum2d();
120 extern void Cigamn2d();
121 extern void Cigamx2d();
122 extern void Cdgesd2d();
123 extern void Cdgerv2d();
124 /* lapack */
125 void  dlacpy_();
126 /* aux fonctions */
127 extern int localindice();
128 extern void *mr2d_malloc();
129 extern int ppcm();
130 extern int localsize();
131 extern int memoryblocksize();
132 extern int changeorigin();
133 extern void paramcheck();
134 /* tools and others function */
135 #define scanD0 dtrscanD0
136 #define dispmat dtrdispmat
137 #define setmemory dtrsetmemory
138 #define freememory dtrfreememory
139 #define scan_intervals dtrscan_intervals
140 extern void scanD0();
141 extern void dispmat();
142 extern void setmemory();
143 extern void freememory();
144 extern int scan_intervals();
145 extern void Cpdtrmr2do();
146 extern void Cpdtrmr2d();
147 /* some defines for Cpdtrmr2do */
148 #define SENDBUFF 0
149 #define RECVBUFF 1
150 #define SIZEBUFF 2
151 #if 0
152 #define DEBUG
153 #endif
154 #ifndef DEBUG
155 #define NDEBUG
156 #endif
157 #include <stdio.h>
158 #include <stdlib.h>
159 #include <string.h>
160 #include <ctype.h>
161 #include <assert.h>
162 /* initblock: intialize the local part of a matrix with random data (well,
163  * not very random) */
164 static2 void
initblock(block,m,n)165 initblock(block, m, n)
166   double *block;
167   int   m, n;
168 {
169   double *pdata;
170   int   i;
171   pdata = block;
172   for (i = 0; i < m * n; i++, pdata++) {
173     (*pdata) = i;
174   };
175 }
176 /* getparam:read from a file a list of integer parameters, the end of the
177  * parameters to read is given by a NULL at the end of the args list */
178 #ifdef __STDC__
179 #include <stdarg.h>
180 static void
getparam(FILE * f,...)181 getparam(FILE * f,...)
182 {
183 #else
184 #include <varargs.h>
185 static void
186 getparam(va_alist)
187 va_dcl
188 {
189   FILE *f;
190 #endif
191   va_list ap;
192   int   i;
193   static int nbline;
194   char *ptr, *next;
195   int  *var;
196   static char buffer[200];
197 #ifdef __STDC__
198   va_start(ap, f);
199 #else
200   va_start(ap);
201   f = va_arg(ap, FILE *);
202 #endif
203   do {
204     next = fgets(buffer, 200, f);
205     if (next == NULL) {
206       fprintf(stderr, "bad configuration driver file:after line %d\n", nbline);
207       exit(1);
208     }
209     nbline += 1;
210   } while (buffer[0] == '#');
211   ptr = buffer;
212   var = va_arg(ap, int *);
213   while (var != NULL) {
214     *var = strtol(ptr, &next, 10);
215     if (ptr == next) {
216       fprintf(stderr, "bad configuration driver file:error line %d\n", nbline);
217       exit(1);
218     }
219     ptr = next;
220     var = va_arg(ap, int *);
221   }
222   va_end(ap);
223 }
224 void
225 initforpvm(argc, argv)
226   int   argc;
227   char *argv[];
228 {
229   int   pnum, nproc;
230   Cblacs_pinfo(&pnum, &nproc);
231   if (nproc < 1) {	/* we are with PVM */
232     if (pnum == 0) {
233       if (argc < 2) {
234 	fprintf(stderr, "usage with PVM:xdtrmr nbproc\n\
235 \t where nbproc is the number of nodes to initialize\n");
236 	exit(1);
237       }
238       nproc = atoi(argv[1]);
239     }
240     Cblacs_setup(&pnum, &nproc);
241   }
242 }
243 int
244 main(argc, argv)
245   int   argc;
246   char *argv[];
247 {
248   /* We initialize the data-block on the current processor, then redistribute
249    * it, and perform the inverse redistribution  to compare the local memory
250    * with the initial one. */
251   /* Data file */
252   FILE *fp;
253   int   nbre, nbremax;
254   /* Data distribution 0 parameters */
255   int   p0,	/* # of rows in the processor grid */
256         q0;	/* # of columns in the processor grid */
257   /* Data distribution 1 parameters */
258   int   p1, q1;
259   /* # of parameter to be read on the keyboard */
260 #define nbparameter 24
261   /* General variables */
262   int   blocksize0;
263   int   mypnum, nprocs;
264   int   parameters[nbparameter], nberrors;
265   int   i;
266   int   ia, ja, ib, jb, m, n;
267   int   gcontext, context0, context1;
268   int   myprow1, myprow0, mypcol0, mypcol1;
269   int   dummy;
270   MDESC ma, mb;
271   char *uplo, *diag;
272   double *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide;
273 #ifdef UsingMpiBlacs
274    MPI_Init(&argc, &argv);
275 #endif
276   setvbuf(stdout, NULL, _IOLBF, 0);
277   setvbuf(stderr, NULL, _IOLBF, 0);
278 #ifdef T3D
279   free(malloc(14000000));
280 #endif
281   initforpvm(argc, argv);
282   /* Read physical parameters */
283   Cblacs_pinfo(&mypnum, &nprocs);
284   /* initialize BLACS for the parameter communication */
285   Cblacs_get(0, 0, &gcontext);
286   Cblacs_gridinit(&gcontext, "R", nprocs, 1);
287   Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy);
288   if (mypnum == 0) {
289     if ((fp = fopen("TRMR2D.dat", "r")) == NULL) {
290       fprintf(stderr, "Can't open TRMR2D.dat\n");
291       exit(1);
292     };
293     printf("\n// DTRMR2D TESTER for DOUBLE PRECISION //\n");
294     getparam(fp, &nbre, NULL);
295     printf("////////// %d tests \n\n", nbre);
296     parameters[0] = nbre;
297     Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1);
298   } else {
299     Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0);
300     nbre = parameters[0];
301   };
302   if (mypnum == 0) {
303     printf("\n  m   n  m0  n0  sr0 sc0 i0  j0  p0  q0 nbr0 nbc0 \
304 m1  n1  sr1 sc1 i1  j1  p1  q1 nbr1 nbc1\n\n");
305   };
306   /****** TEST LOOP *****/
307   /* Here we are in grip 1xnprocs */
308   nbremax = nbre;
309 #ifdef DEBUG
310   fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum);
311 #endif
312   while (nbre-- != 0) {	/* Loop on the serie of tests */
313     /* All the processors read the parameters so we have to be in a 1xnprocs
314      * grid at each iteration */
315     /* Read processors grid and matrices parameters */
316     if (mypnum == 0) {
317       int   u, d;
318       getparam(fp,
319 	       &m, &n,
320 	       &ma.m, &ma.n, &ma.sprow, &ma.spcol,
321 	       &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol,
322 	       &mb.m, &mb.n, &mb.sprow, &mb.spcol,
323 	       &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol,
324 	       &u, &d,
325 	       NULL);
326       uplo = u ? "UPPER" : "LOWER";
327       diag = d ? "UNIT" : "NONUNIT";
328       printf("\t\t************* TEST # %d **********\n",
329 	     nbremax - nbre);
330       printf(" %3d %3d %3d %3d %3d %3d %3d %3d \
331 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d",
332 	     m, n,
333 	     ma.m, ma.n, ma.sprow, ma.spcol,
334 	     ia, ja, p0, q0, ma.nbrow, ma.nbcol,
335 	     mb.m, mb.n, mb.sprow, mb.spcol,
336 	     ib, jb, p1, q1, mb.nbrow, mb.nbcol);
337       printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low",
338 	     toupper(*diag) == 'U' ? "unit" : "nonunit");
339       printf("\n");
340       if (p0 * q0 > nprocs || p1 * q1 > nprocs) {
341 	fprintf(stderr, "not enough nodes:%d processors required\n",
342 		max(p0 * q0, p1 * q1));
343 	exit(1);
344       }
345       parameters[0] = p0;
346       parameters[1] = q0;
347       parameters[2] = ma.nbrow;
348       parameters[3] = ma.nbcol;
349       parameters[4] = p1;
350       parameters[5] = q1;
351       parameters[6] = mb.nbrow;
352       parameters[7] = mb.nbcol;
353       parameters[8] = ma.m;
354       parameters[9] = ma.n;
355       parameters[10] = ma.sprow;
356       parameters[11] = ma.spcol;
357       parameters[12] = mb.sprow;
358       parameters[13] = mb.spcol;
359       parameters[14] = ia;
360       parameters[15] = ja;
361       parameters[16] = ib;
362       parameters[17] = jb;
363       parameters[18] = m;
364       parameters[19] = n;
365       parameters[20] = mb.m;
366       parameters[21] = mb.n;
367       parameters[22] = *uplo == 'U';
368       parameters[23] = *diag == 'U';
369       Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1);
370     } else {
371       Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0);
372       p0 = parameters[0];
373       q0 = parameters[1];
374       ma.nbrow = parameters[2];
375       ma.nbcol = parameters[3];
376       p1 = parameters[4];
377       q1 = parameters[5];
378       mb.nbrow = parameters[6];
379       mb.nbcol = parameters[7];
380       ma.m = parameters[8];
381       ma.n = parameters[9];
382       ma.sprow = parameters[10];
383       ma.spcol = parameters[11];
384       mb.sprow = parameters[12];
385       mb.spcol = parameters[13];
386       ia = parameters[14];
387       ja = parameters[15];
388       ib = parameters[16];
389       jb = parameters[17];
390       m = parameters[18];
391       n = parameters[19];
392       mb.m = parameters[20];
393       mb.n = parameters[21];
394       ma.desctype = BLOCK_CYCLIC_2D;
395       mb.desctype = BLOCK_CYCLIC_2D;
396       uplo = parameters[22] ? "UPPER" : "LOWER";
397       diag = parameters[23] ? "UNIT" : "NONUNIT";
398     };
399     Cblacs_get(0, 0, &context0);
400     Cblacs_gridinit(&context0, "R", p0, q0);
401     Cblacs_get(0, 0, &context1);
402     Cblacs_gridinit(&context1, "R", p1, q1);
403     Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0);
404     if (myprow0 >= p0 || mypcol0 >= q0)
405       myprow0 = mypcol0 = -1;
406     Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1);
407     if (myprow1 >= p1 || mypcol1 >= q1)
408       myprow1 = mypcol1 = -1;
409     assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
410     assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
411     ma.ctxt = context0;
412     mb.ctxt = context1;
413     /* From here, we are not assuming that only the processors working in the
414      * redistribution are calling  xxMR2D, but the ones not concerned will do
415      * nothing. */
416     /* We compute the exact size of the local memory block for the memory
417      * allocations */
418     if (myprow0 >= 0 && mypcol0 >= 0) {
419       blocksize0 = memoryblocksize(&ma);
420       ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m);
421       setmemory(&ptrmyblock, blocksize0);
422       initblock(ptrmyblock, 1, blocksize0);
423       setmemory(&ptrmyblockcopy, blocksize0);
424       memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock,
425 	     blocksize0 * sizeof(double));
426       setmemory(&ptrmyblockvide, blocksize0);
427       for (i = 0; i < blocksize0; i++)
428 	ptrmyblockvide[i] = -1;
429     };	/* if (mypnum < p0 * q0) */
430     if (myprow1 >= 0 && mypcol1 >= 0) {
431       setmemory(&ptrsavemyblock, memoryblocksize(&mb));
432       mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m);
433     };	/* if (mypnum < p1 * q1)  */
434     /* Redistribute the matrix from grid 0 to grid 1 (memory location
435      * ptrmyblock to ptrsavemyblock) */
436     Cpdtrmr2d(uplo, diag, m, n,
437 	      ptrmyblock, ia, ja, &ma,
438 	      ptrsavemyblock, ib, jb, &mb, gcontext);
439     /* Perform the inverse redistribution of the matrix from grid 1 to grid 0
440      * (memory location ptrsavemyblock to ptrmyblockvide) */
441     Cpdtrmr2d(uplo, diag, m, n,
442 	      ptrsavemyblock, ib, jb, &mb,
443 	      ptrmyblockvide, ia, ja, &ma, gcontext);
444     /* Check the differences */
445     nberrors = 0;
446     if (myprow0 >= 0 && mypcol0 >= 0) {
447       /* only for the processors that do have data at the begining */
448       for (i = 0; i < blocksize0; i++) {
449 	int   li, lj, gi, gj;
450 	int   in;
451 	in = 1;
452 	li = i % ma.lda;
453 	lj = i / ma.lda;
454 	gi = (li / ma.nbrow) * p0 * ma.nbrow +
455 	      SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow;
456 	gj = (lj / ma.nbcol) * q0 * ma.nbcol +
457 	      SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol;
458 	assert(gi < ma.m && gj < ma.n);
459 	gi -= (ia - 1);
460 	gj -= (ja - 1);
461 	if (gi < 0 || gj < 0 || gi >= m || gj >= n)
462 	  in = 0;
463 	else if (toupper(*uplo) == 'U')
464 	  in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U'));
465 	else
466 	  in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U'));
467 	if (!in) {
468 	  ptrmyblockcopy[i] = -1;
469 	}
470 	if (ptrmyblockvide[i] != ptrmyblockcopy[i]) {
471 	  nberrors++;
472 	  printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n"
473 		 ,mypnum, i,
474 		 ptrmyblockvide[i], ptrmyblockcopy[i]);
475 	};
476       };
477       if (nberrors > 0) {
478 	printf("Processor %d, has tested  %d DOUBLE PRECISION elements,\
479 Number of redistribution errors = %d \n",
480 	       mypnum, blocksize0, nberrors);
481       }
482     }
483     /* Look at the errors on all the processors at this point. */
484     Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0);
485     if (mypnum == 0)
486       if (nberrors)
487 	printf("  => Total number of redistribution errors = %d \n",
488 	       nberrors);
489       else
490 	printf("TEST PASSED OK\n");
491     /* release memory for the next iteration */
492     if (myprow0 >= 0 && mypcol0 >= 0) {
493       freememory((char *) ptrmyblock);
494       freememory((char *) ptrmyblockvide);
495       freememory((char *) ptrmyblockcopy);
496     };	/* if (mypnum < p0 * q0) */
497     /* release memory for the next iteration */
498     if (myprow1 >= 0 && mypcol1 >= 0) {
499       freememory((char *) ptrsavemyblock);
500     };
501     if (myprow0 >= 0)
502       Cblacs_gridexit(context0);
503     if (myprow1 >= 0)
504       Cblacs_gridexit(context1);
505   };	/* while nbre != 0 */
506   if (mypnum == 0) {
507     fclose(fp);
508   };
509   Cblacs_exit(0);
510   return 0;
511 }/* main */
512