1 #include "redist.h"
2 /* $Id: psgemrdrv.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
3  *
4  * psgemrdrv.c :
5  *
6  *
7  * PURPOSE:
8  *
9  * this driver is testing the PSGEMR2D routine. It calls it to obtain a new
10  * scattered block data decomposition of a distributed REAL (block scattered)
11  * matrix. Then it calls PSGEMR2D for the inverse redistribution and checks
12  * the results with the initial data.
13  *
14  * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the
15  * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the
16  * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition
17  * on the processor grid p0 x q0.
18  *
19  * See psgemr.c file for detailed info on the PSGEMR2D function.
20  *
21  *
22  * The testing parameters are read from the file GEMR2D.dat, see the file in the
23  * distribution to have an example.
24  *
25  * created by Bernard Tourancheau in April 1994.
26  *
27  * modifications : see sccs history
28  *
29  * ===================================
30  *
31  *
32  * NOTE :
33  *
34  * - the matrix elements are REAL
35  *
36  * - memory requirements : this procedure requires approximately 3 times the
37  * memory space of the initial data block in grid 0 (initial block, copy for
38  * test and second redistribution result) and 1 time the memory space of the
39  * result data block in grid 1. with  the element size = sizeof(float) bytes,
40  *
41  *
42  * - use the procedures of the files:
43  *
44  * psgemr.o psgemr2.o psgemraux.o
45  *
46  *
47  * ======================================
48  *
49  * WARNING ASSUMPTIONS :
50  *
51  *
52  * ========================================
53  *
54  *
55  * Planned changes:
56  *
57  *
58  *
59  * ========================================= */
60 #define static2 static
61 #if defined(Add_) || defined(f77IsF2C)
62 #define fortran_mr2d psgemr2do_
63 #define fortran_mr2dnew psgemr2d_
64 #elif defined(UpCase)
65 #define fortran_mr2dnew PSGEMR2D
66 #define fortran_mr2d PSGEMR2DO
67 #define scopy_ SCOPY
68 #define slacpy_ SLACPY
69 #else
70 #define fortran_mr2d psgemr2do
71 #define fortran_mr2dnew psgemr2d
72 #define scopy_ scopy
73 #define slacpy_ slacpy
74 #endif
75 #define Clacpy Csgelacpy
76 void  Clacpy();
77 typedef struct {
78   int   desctype;
79   int   ctxt;
80   int   m;
81   int   n;
82   int   nbrow;
83   int   nbcol;
84   int   sprow;
85   int   spcol;
86   int   lda;
87 }     MDESC;
88 #define BLOCK_CYCLIC_2D 1
89 typedef struct {
90   int   lstart;
91   int   len;
92 }     IDESC;
93 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
94 #define max(A,B) ((A)>(B)?(A):(B))
95 #define min(A,B) ((A)>(B)?(B):(A))
96 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
97 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
98 #ifdef MALLOCDEBUG
99 #define malloc mymalloc
100 #define free myfree
101 #define realloc myrealloc
102 #endif
103 /* Cblacs */
104 extern void Cblacs_pcoord();
105 extern int Cblacs_pnum();
106 extern void Csetpvmtids();
107 extern void Cblacs_get();
108 extern void Cblacs_pinfo();
109 extern void Cblacs_gridinfo();
110 extern void Cblacs_gridinit();
111 extern void Cblacs_exit();
112 extern void Cblacs_gridexit();
113 extern void Cblacs_setup();
114 extern void Cigebs2d();
115 extern void Cigebr2d();
116 extern void Cigesd2d();
117 extern void Cigerv2d();
118 extern void Cigsum2d();
119 extern void Cigamn2d();
120 extern void Cigamx2d();
121 extern void Csgesd2d();
122 extern void Csgerv2d();
123 /* lapack */
124 void  slacpy_();
125 /* aux fonctions */
126 extern int localindice();
127 extern void *mr2d_malloc();
128 extern int ppcm();
129 extern int localsize();
130 extern int memoryblocksize();
131 extern int changeorigin();
132 extern void paramcheck();
133 /* tools and others function */
134 #define scanD0 sgescanD0
135 #define dispmat sgedispmat
136 #define setmemory sgesetmemory
137 #define freememory sgefreememory
138 #define scan_intervals sgescan_intervals
139 extern void scanD0();
140 extern void dispmat();
141 extern void setmemory();
142 extern void freememory();
143 extern int scan_intervals();
144 extern void Cpsgemr2do();
145 extern void Cpsgemr2d();
146 /* some defines for Cpsgemr2do */
147 #define SENDBUFF 0
148 #define RECVBUFF 1
149 #define SIZEBUFF 2
150 #if 0
151 #define DEBUG
152 #endif
153 #ifndef DEBUG
154 #define NDEBUG
155 #endif
156 #include <stdio.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <ctype.h>
160 #include <assert.h>
161 /* initblock: intialize the local part of a matrix with random data (well,
162  * not very random) */
163 static2 void
initblock(block,m,n)164 initblock(block, m, n)
165   float *block;
166   int   m, n;
167 {
168   float *pdata;
169   int   i;
170   pdata = block;
171   for (i = 0; i < m * n; i++, pdata++) {
172     (*pdata) = i;
173   };
174 }
175 /* getparam:read from a file a list of integer parameters, the end of the
176  * parameters to read is given by a NULL at the end of the args list */
177 #ifdef __STDC__
178 #include <stdarg.h>
179 static void
getparam(FILE * f,...)180 getparam(FILE * f,...)
181 {
182 #else
183 #include <varargs.h>
184 static void
185 getparam(va_alist)
186 va_dcl
187 {
188   FILE *f;
189 #endif
190   va_list ap;
191   int   i;
192   static int nbline;
193   char *ptr, *next;
194   int  *var;
195   static char buffer[200];
196 #ifdef __STDC__
197   va_start(ap, f);
198 #else
199   va_start(ap);
200   f = va_arg(ap, FILE *);
201 #endif
202   do {
203     next = fgets(buffer, 200, f);
204     if (next == NULL) {
205       fprintf(stderr, "bad configuration driver file:after line %d\n", nbline);
206       exit(1);
207     }
208     nbline += 1;
209   } while (buffer[0] == '#');
210   ptr = buffer;
211   var = va_arg(ap, int *);
212   while (var != NULL) {
213     *var = strtol(ptr, &next, 10);
214     if (ptr == next) {
215       fprintf(stderr, "bad configuration driver file:error line %d\n", nbline);
216       exit(1);
217     }
218     ptr = next;
219     var = va_arg(ap, int *);
220   }
221   va_end(ap);
222 }
223 void
224 initforpvm(argc, argv)
225   int   argc;
226   char *argv[];
227 {
228   int   pnum, nproc;
229   Cblacs_pinfo(&pnum, &nproc);
230   if (nproc < 1) {	/* we are with PVM */
231     if (pnum == 0) {
232       if (argc < 2) {
233 	fprintf(stderr, "usage with PVM:xsgemr nbproc\n\
234 \t where nbproc is the number of nodes to initialize\n");
235 	exit(1);
236       }
237       nproc = atoi(argv[1]);
238     }
239     Cblacs_setup(&pnum, &nproc);
240   }
241 }
242 int
243 main(argc, argv)
244   int   argc;
245   char *argv[];
246 {
247   /* We initialize the data-block on the current processor, then redistribute
248    * it, and perform the inverse redistribution  to compare the local memory
249    * with the initial one. */
250   /* Data file */
251   FILE *fp;
252   int   nbre, nbremax;
253   /* Data distribution 0 parameters */
254   int   p0,	/* # of rows in the processor grid */
255         q0;	/* # of columns in the processor grid */
256   /* Data distribution 1 parameters */
257   int   p1, q1;
258   /* # of parameter to be read on the keyboard */
259 #define nbparameter 24
260   /* General variables */
261   int   blocksize0;
262   int   mypnum, nprocs;
263   int   parameters[nbparameter], nberrors;
264   int   i;
265   int   ia, ja, ib, jb, m, n;
266   int   gcontext, context0, context1;
267   int   myprow1, myprow0, mypcol0, mypcol1;
268   int   dummy;
269   MDESC ma, mb;
270   float *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide;
271 #ifdef UsingMpiBlacs
272    MPI_Init(&argc, &argv);
273 #endif
274   setvbuf(stdout, NULL, _IOLBF, 0);
275   setvbuf(stderr, NULL, _IOLBF, 0);
276 #ifdef T3D
277   free(malloc(14000000));
278 #endif
279   initforpvm(argc, argv);
280   /* Read physical parameters */
281   Cblacs_pinfo(&mypnum, &nprocs);
282   /* initialize BLACS for the parameter communication */
283   Cblacs_get(0, 0, &gcontext);
284   Cblacs_gridinit(&gcontext, "R", nprocs, 1);
285   Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy);
286   if (mypnum == 0) {
287     if ((fp = fopen("GEMR2D.dat", "r")) == NULL) {
288       fprintf(stderr, "Can't open GEMR2D.dat\n");
289       exit(1);
290     };
291     printf("\n// SGEMR2D TESTER for REAL //\n");
292     getparam(fp, &nbre, NULL);
293     printf("////////// %d tests \n\n", nbre);
294     parameters[0] = nbre;
295     Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1);
296   } else {
297     Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0);
298     nbre = parameters[0];
299   };
300   if (mypnum == 0) {
301     printf("\n  m   n  m0  n0  sr0 sc0 i0  j0  p0  q0 nbr0 nbc0 \
302 m1  n1  sr1 sc1 i1  j1  p1  q1 nbr1 nbc1\n\n");
303   };
304   /****** TEST LOOP *****/
305   /* Here we are in grip 1xnprocs */
306   nbremax = nbre;
307 #ifdef DEBUG
308   fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum);
309 #endif
310   while (nbre-- != 0) {	/* Loop on the serie of tests */
311     /* All the processors read the parameters so we have to be in a 1xnprocs
312      * grid at each iteration */
313     /* Read processors grid and matrices parameters */
314     if (mypnum == 0) {
315       int   u, d;
316       getparam(fp,
317 	       &m, &n,
318 	       &ma.m, &ma.n, &ma.sprow, &ma.spcol,
319 	       &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol,
320 	       &mb.m, &mb.n, &mb.sprow, &mb.spcol,
321 	       &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol,
322 	       NULL);
323       printf("\t\t************* TEST # %d **********\n",
324 	     nbremax - nbre);
325       printf(" %3d %3d %3d %3d %3d %3d %3d %3d \
326 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d",
327 	     m, n,
328 	     ma.m, ma.n, ma.sprow, ma.spcol,
329 	     ia, ja, p0, q0, ma.nbrow, ma.nbcol,
330 	     mb.m, mb.n, mb.sprow, mb.spcol,
331 	     ib, jb, p1, q1, mb.nbrow, mb.nbcol);
332       printf("\n");
333       if (p0 * q0 > nprocs || p1 * q1 > nprocs) {
334 	fprintf(stderr, "not enough nodes:%d processors required\n",
335 		max(p0 * q0, p1 * q1));
336 	exit(1);
337       }
338       parameters[0] = p0;
339       parameters[1] = q0;
340       parameters[2] = ma.nbrow;
341       parameters[3] = ma.nbcol;
342       parameters[4] = p1;
343       parameters[5] = q1;
344       parameters[6] = mb.nbrow;
345       parameters[7] = mb.nbcol;
346       parameters[8] = ma.m;
347       parameters[9] = ma.n;
348       parameters[10] = ma.sprow;
349       parameters[11] = ma.spcol;
350       parameters[12] = mb.sprow;
351       parameters[13] = mb.spcol;
352       parameters[14] = ia;
353       parameters[15] = ja;
354       parameters[16] = ib;
355       parameters[17] = jb;
356       parameters[18] = m;
357       parameters[19] = n;
358       parameters[20] = mb.m;
359       parameters[21] = mb.n;
360       Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1);
361     } else {
362       Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0);
363       p0 = parameters[0];
364       q0 = parameters[1];
365       ma.nbrow = parameters[2];
366       ma.nbcol = parameters[3];
367       p1 = parameters[4];
368       q1 = parameters[5];
369       mb.nbrow = parameters[6];
370       mb.nbcol = parameters[7];
371       ma.m = parameters[8];
372       ma.n = parameters[9];
373       ma.sprow = parameters[10];
374       ma.spcol = parameters[11];
375       mb.sprow = parameters[12];
376       mb.spcol = parameters[13];
377       ia = parameters[14];
378       ja = parameters[15];
379       ib = parameters[16];
380       jb = parameters[17];
381       m = parameters[18];
382       n = parameters[19];
383       mb.m = parameters[20];
384       mb.n = parameters[21];
385       ma.desctype = BLOCK_CYCLIC_2D;
386       mb.desctype = BLOCK_CYCLIC_2D;
387     };
388     Cblacs_get(0, 0, &context0);
389     Cblacs_gridinit(&context0, "R", p0, q0);
390     Cblacs_get(0, 0, &context1);
391     Cblacs_gridinit(&context1, "R", p1, q1);
392     Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0);
393     if (myprow0 >= p0 || mypcol0 >= q0)
394       myprow0 = mypcol0 = -1;
395     Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1);
396     if (myprow1 >= p1 || mypcol1 >= q1)
397       myprow1 = mypcol1 = -1;
398     assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
399     assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
400     ma.ctxt = context0;
401     mb.ctxt = context1;
402     /* From here, we are not assuming that only the processors working in the
403      * redistribution are calling  xxMR2D, but the ones not concerned will do
404      * nothing. */
405     /* We compute the exact size of the local memory block for the memory
406      * allocations */
407     if (myprow0 >= 0 && mypcol0 >= 0) {
408       blocksize0 = memoryblocksize(&ma);
409       ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m);
410       setmemory(&ptrmyblock, blocksize0);
411       initblock(ptrmyblock, 1, blocksize0);
412       setmemory(&ptrmyblockcopy, blocksize0);
413       memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock,
414 	     blocksize0 * sizeof(float));
415       setmemory(&ptrmyblockvide, blocksize0);
416       for (i = 0; i < blocksize0; i++)
417 	ptrmyblockvide[i] = -1;
418     };	/* if (mypnum < p0 * q0) */
419     if (myprow1 >= 0 && mypcol1 >= 0) {
420       setmemory(&ptrsavemyblock, memoryblocksize(&mb));
421       mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m);
422     };	/* if (mypnum < p1 * q1)  */
423     /* Redistribute the matrix from grid 0 to grid 1 (memory location
424      * ptrmyblock to ptrsavemyblock) */
425     Cpsgemr2d(m, n,
426 	      ptrmyblock, ia, ja, &ma,
427 	      ptrsavemyblock, ib, jb, &mb, gcontext);
428     /* Perform the inverse redistribution of the matrix from grid 1 to grid 0
429      * (memory location ptrsavemyblock to ptrmyblockvide) */
430     Cpsgemr2d(m, n,
431 	      ptrsavemyblock, ib, jb, &mb,
432 	      ptrmyblockvide, ia, ja, &ma, gcontext);
433     /* Check the differences */
434     nberrors = 0;
435     if (myprow0 >= 0 && mypcol0 >= 0) {
436       /* only for the processors that do have data at the begining */
437       for (i = 0; i < blocksize0; i++) {
438 	int   li, lj, gi, gj;
439 	int   in;
440 	in = 1;
441 	li = i % ma.lda;
442 	lj = i / ma.lda;
443 	gi = (li / ma.nbrow) * p0 * ma.nbrow +
444 	      SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow;
445 	gj = (lj / ma.nbcol) * q0 * ma.nbcol +
446 	      SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol;
447 	assert(gi < ma.m && gj < ma.n);
448 	gi -= (ia - 1);
449 	gj -= (ja - 1);
450 	if (gi < 0 || gj < 0 || gi >= m || gj >= n)
451 	  in = 0;
452 	if (!in) {
453 	  ptrmyblockcopy[i] = -1;
454 	}
455 	if (ptrmyblockvide[i] != ptrmyblockcopy[i]) {
456 	  nberrors++;
457 	};
458       };
459       if (nberrors > 0) {
460 	printf("Processor %d, has tested  %d REAL elements,\
461 Number of redistribution errors = %d \n",
462 	       mypnum, blocksize0, nberrors);
463       }
464     }
465     /* Look at the errors on all the processors at this point. */
466     Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0);
467     if (mypnum == 0)
468       if (nberrors)
469 	printf("  => Total number of redistribution errors = %d \n",
470 	       nberrors);
471       else
472 	printf("TEST PASSED OK\n");
473     /* release memory for the next iteration */
474     if (myprow0 >= 0 && mypcol0 >= 0) {
475       freememory((char *) ptrmyblock);
476       freememory((char *) ptrmyblockvide);
477       freememory((char *) ptrmyblockcopy);
478     };	/* if (mypnum < p0 * q0) */
479     /* release memory for the next iteration */
480     if (myprow1 >= 0 && mypcol1 >= 0) {
481       freememory((char *) ptrsavemyblock);
482     };
483     if (myprow0 >= 0)
484       Cblacs_gridexit(context0);
485     if (myprow1 >= 0)
486       Cblacs_gridexit(context1);
487   };	/* while nbre != 0 */
488   if (mypnum == 0) {
489     fclose(fp);
490   };
491   Cblacs_exit(0);
492   return 0;
493 }/* main */
494