1 /* testGridMPI.c */
2
3 #include "../spoolesMPI.h"
4 #include "../../timings.h"
5
6 /*--------------------------------------------------------------------*/
7 void mkNDlinsys ( int n1, int n2, int n3, int maxzeros, int maxsize,
8 int type, int symmetryflag, int nrhs, int seed, int msglvl,
9 FILE *msgFile, ETree **pfrontETree, IVL **psymbfacIVL,
10 InpMtx **pmtxA, DenseMtx **pmtxX, DenseMtx **pmtxB ) ;
11 /*--------------------------------------------------------------------*/
12 int
main(int argc,char * argv[])13 main ( int argc, char *argv[] )
14 /*
15 */
16 {
17 Chv *rootchv ;
18 ChvManager *chvmanager ;
19 DenseMtx *mtxB, *mtxkeep, *mtxX, *mtxZ ;
20 double cputotal, cutoff, droptol, error, factorops,
21 solvecpu, solveops, tau, terror, t1, t2 ;
22 double cpus[14], tcpus[14] ;
23 DV *cumopsDV ;
24 ETree *frontETree ;
25 FILE *msgFile ;
26 FrontMtx *frontmtx ;
27 InpMtx *mtxA, *mtxAkeep ;
28 int factorerror, lookahead, maptype, maxsize, maxzeros,
29 msglvl, myid, neqns, nproc, nrhs, nrow, nzf, n1, n2,
30 n3, pivotingflag, seed, sparsityflag, symmetryflag,
31 tag, type ;
32 int *rowindX, *rowindZ ;
33 int stats[17], tstats[17] ;
34 IV *frontOwnersIV, *vtxmapIV ;
35 IVL *symbfacIVL ;
36 SolveMap *solvemap ;
37 SubMtxManager *factormanager, *solvemanager ;
38 /*
39 ---------------------------------------------------------------
40 find out the identity of this process and the number of process
41 ---------------------------------------------------------------
42 */
43 MPI_Init(&argc, &argv) ;
44 MPI_Comm_rank(MPI_COMM_WORLD, &myid) ;
45 MPI_Comm_size(MPI_COMM_WORLD, &nproc) ;
46 fprintf(stdout, "\n process %d of %d, argc = %d", myid, nproc, argc) ;
47 fflush(stdout) ;
48 if ( argc != 19 ) {
49 fprintf(stdout,
50 "\n\n usage : %s "
51 "\n msglvl msgFile n1 n2 n3 maxzeros maxsize seed "
52 "\n type symmetryflag sparsityflag pivotingflag "
53 "\n tau droptol lookahead nrhs maptype cutoff"
54 "\n msglvl -- message level"
55 "\n msgFile -- message file"
56 "\n n1 -- # of points in first dimension"
57 "\n n2 -- # of points in second dimension"
58 "\n n3 -- # of points in third dimension"
59 "\n maxzeros -- max # of zeros in a front"
60 "\n maxsize -- max # of internal vertices in a front"
61 "\n seed -- random number seed"
62 "\n type -- type of entries"
63 "\n 1 --> real "
64 "\n 2 --> complex "
65 "\n symmetryflag -- symmetry flag"
66 "\n 0 --> symmetric "
67 "\n 1 --> hermitian "
68 "\n 2 --> nonsymmetric "
69 "\n sparsityflag -- sparsity flag"
70 "\n 0 --> store dense fronts"
71 "\n 1 --> store sparse fronts, use droptol to drop entries"
72 "\n pivotingflag -- pivoting flag"
73 "\n 0 --> do not pivot"
74 "\n 1 --> enable pivoting"
75 "\n tau -- upper bound on factor entries"
76 "\n used only with pivoting"
77 "\n droptol -- lower bound on factor entries"
78 "\n used only with sparse fronts"
79 "\n lookahead -- parameter to schedule computation"
80 "\n 0 --> no lookahead"
81 "\n nonzero --> look up the tree this many steps"
82 "\n nrhs -- number of right hand sides"
83 "\n maptype -- type of factorization map"
84 "\n 1 --> wrap map"
85 "\n 2 --> balanced map via post-order traversal"
86 "\n 3 --> subtree-subset map"
87 "\n 4 --> domain decomposition map"
88 "\n cutoff -- used when maptype = 4"
89 "\n 0 < cutoff < 1 to define multisector"
90 "\n try cutoff = 1/nproc or 1/(2*nproc)"
91 "\n", argv[0]) ;
92 { int ii ;
93 fprintf(stdout, "\n\n argc = %d", argc) ;
94 for ( ii = 0 ; ii < argc ; ii++ ) {
95 fprintf(stdout, "\n arg %d = %s", ii, argv[ii]) ;
96 }
97 }
98 MPI_Finalize() ;
99 return(0) ;
100 }
101 msglvl = atoi(argv[1]) ;
102 if ( strcmp(argv[2], "stdout") == 0 ) {
103 msgFile = stdout ;
104 } else {
105 int length = strlen(argv[2]) + 1 + 4 ;
106 char *buffer = CVinit(length, '\0') ;
107 sprintf(buffer, "%s.%d", argv[2], myid) ;
108 if ( (msgFile = fopen(buffer, "w")) == NULL ) {
109 fprintf(stderr, "\n fatal error in %s"
110 "\n unable to open file %s\n",
111 argv[0], buffer) ;
112 return(-1) ;
113 }
114 CVfree(buffer) ;
115 }
116 n1 = atoi(argv[ 3]) ;
117 n2 = atoi(argv[ 4]) ;
118 n3 = atoi(argv[ 5]) ;
119 maxzeros = atoi(argv[ 6]) ;
120 maxsize = atoi(argv[ 7]) ;
121 seed = atoi(argv[ 8]) ;
122 type = atoi(argv[ 9]) ;
123 symmetryflag = atoi(argv[10]) ;
124 sparsityflag = atoi(argv[11]) ;
125 pivotingflag = atoi(argv[12]) ;
126 tau = atof(argv[13]) ;
127 droptol = atof(argv[14]) ;
128 lookahead = atoi(argv[15]) ;
129 nrhs = atoi(argv[16]) ;
130 maptype = atoi(argv[17]) ;
131 cutoff = atof(argv[18]) ;
132 fprintf(msgFile, "\n %s :"
133 "\n msglvl = %d"
134 "\n msgFile = %s"
135 "\n n1 = %d"
136 "\n n2 = %d"
137 "\n n3 = %d"
138 "\n maxzeros = %d"
139 "\n maxsize = %d"
140 "\n seed = %d"
141 "\n type = %d"
142 "\n symmetryflag = %d"
143 "\n sparsityflag = %d"
144 "\n pivotingflag = %d"
145 "\n tau = %f"
146 "\n droptol = %f"
147 "\n lookahead = %d"
148 "\n nrhs = %d"
149 "\n maptype = %d"
150 "\n cutoff = %f"
151 "\n",
152 argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize, seed,
153 type, symmetryflag, sparsityflag, pivotingflag, tau, droptol,
154 lookahead, nrhs, maptype, cutoff) ;
155 fflush(msgFile) ;
156 neqns = n1*n2*n3 ;
157 /*
158 ---------------------------------------------------------------
159 find out the identity of this process and the number of process
160 ---------------------------------------------------------------
161 */
162 MPI_Comm_rank(MPI_COMM_WORLD, &myid) ;
163 MPI_Comm_size(MPI_COMM_WORLD, &nproc) ;
164 if ( myid == 0 ) {
165 /*
166 --------------------------
167 generate the linear system
168 --------------------------
169 */
170 mkNDlinsys(n1, n2, n3, maxzeros, maxsize, type, symmetryflag, nrhs,
171 seed, msglvl, msgFile, &frontETree, &symbfacIVL, &mtxA,
172 &mtxX, &mtxB) ;
173 /*
174 -------------------------------
175 free the symbolic factorization
176 -------------------------------
177 */
178 IVL_free(symbfacIVL) ;
179 /*
180 -------------------------------------------
181 send the front tree to the other processors
182 -------------------------------------------
183 */
184 frontETree = ETree_MPI_Bcast(frontETree, 0,
185 msglvl, msgFile, MPI_COMM_WORLD) ;
186 } else {
187 /*
188 ---------------------------------------
189 receive the front tree from processor 0
190 ---------------------------------------
191 */
192 frontETree = ETree_MPI_Bcast(NULL, 0,
193 msglvl, msgFile, MPI_COMM_WORLD) ;
194 /*
195 ------------------------------
196 create the objects for X and B
197 ------------------------------
198 */
199 mtxX = DenseMtx_new() ;
200 mtxB = DenseMtx_new() ;
201 DenseMtx_init(mtxX, type, -1, -1, 0, nrhs, 1, 0) ;
202 DenseMtx_init(mtxB, type, -1, -1, 0, nrhs, 1, 0) ;
203 /*
204 -----------------------
205 create the object for A
206 -----------------------
207 */
208 mtxA = InpMtx_new() ;
209 InpMtx_init(mtxA, INPMTX_BY_CHEVRONS, type, 0, 0) ;
210 }
211 if ( msglvl > 1 ) {
212 fprintf(msgFile, "\n\n front ETree object") ;
213 ETree_writeForHumanEye(frontETree, msgFile) ;
214 fprintf(msgFile, "\n\n mtxX") ;
215 DenseMtx_writeForHumanEye(mtxX, msgFile) ;
216 fprintf(msgFile, "\n\n mtxB") ;
217 DenseMtx_writeForHumanEye(mtxB, msgFile) ;
218 fprintf(msgFile, "\n\n mtxA") ;
219 InpMtx_writeForHumanEye(mtxA, msgFile) ;
220 }
221 /*
222 ---------------------------------
223 generate the owners map IV object
224 ---------------------------------
225 */
226 cumopsDV = DV_new() ;
227 DV_init(cumopsDV, nproc, NULL) ;
228 DV_fill(cumopsDV, 0.0) ;
229 switch ( maptype ) {
230 case 1 :
231 frontOwnersIV = ETree_wrapMap(frontETree, type, symmetryflag,
232 cumopsDV) ;
233 break ;
234 case 2 :
235 frontOwnersIV = ETree_balancedMap(frontETree, type, symmetryflag,
236 cumopsDV) ;
237 break ;
238 case 3 :
239 frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type,
240 symmetryflag, cumopsDV) ;
241 break ;
242 case 4 :
243 frontOwnersIV = ETree_ddMap(frontETree, type, symmetryflag,
244 cumopsDV, cutoff) ;
245 break ;
246 default :
247 fprintf(stderr, "\n fatal error, maptype = %d", maptype) ;
248 MPI_Finalize() ;
249 exit(-1) ;
250 break ;
251 }
252 if ( msglvl > 1 ) {
253 fprintf(msgFile, "\n\n owners map object") ;
254 IV_writeForHumanEye(frontOwnersIV, msgFile) ;
255 }
256 fprintf(msgFile, "\n\n upper bound on load balance = %4.2f\n",
257 DV_sum(cumopsDV)/DV_max(cumopsDV)) ;
258 DV_free(cumopsDV) ;
259 /*
260 ---------------------
261 create the vertex map
262 ---------------------
263 */
264 vtxmapIV = IV_new() ;
265 IV_init(vtxmapIV, neqns, NULL) ;
266 IVgather(neqns, IV_entries(vtxmapIV), IV_entries(frontOwnersIV),
267 ETree_vtxToFront(frontETree)) ;
268 if ( msglvl > 1 ) {
269 fprintf(msgFile, "\n\n vertex map object") ;
270 IV_writeForHumanEye(vtxmapIV, msgFile) ;
271 }
272 /*
273 ------------------
274 split the X matrix
275 ------------------
276 */
277 tag = 1 ;
278 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
279 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
280 MARKTIME(t1) ;
281 mtxkeep = DenseMtx_MPI_splitByRows(mtxX, vtxmapIV, stats, msglvl,
282 msgFile, tag, MPI_COMM_WORLD) ;
283 DenseMtx_free(mtxX) ;
284 mtxX = mtxkeep ;
285 MARKTIME(t2) ;
286 fprintf(msgFile, "\n CPU %8.3f : split mtxX", t2 - t1) ;
287 fprintf(msgFile,
288 "\n # sent bytes sent # recv bytes recv"
289 "\n local comm : %6d %12d %6d %12d",
290 stats[0], stats[2], stats[1], stats[3]) ;
291 fflush(msgFile) ;
292 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
293 MPI_SUM, 0, MPI_COMM_WORLD) ;
294 if ( myid == 0 ) {
295 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
296 tstats[0], tstats[2], tstats[1], tstats[3]) ;
297 fflush(msgFile) ;
298 }
299 if ( msglvl > 1 ) {
300 fprintf(msgFile, "\n\n mtxX object") ;
301 DenseMtx_writeForHumanEye(mtxX, msgFile) ;
302 }
303 /*
304 ------------------
305 split the B matrix
306 ------------------
307 */
308 tag = 1 ;
309 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
310 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
311 MARKTIME(t1) ;
312 mtxkeep = DenseMtx_MPI_splitByRows(mtxB, vtxmapIV, stats, msglvl,
313 msgFile, tag, MPI_COMM_WORLD) ;
314 DenseMtx_free(mtxB) ;
315 mtxB = mtxkeep ;
316 MARKTIME(t2) ;
317 fprintf(msgFile, "\n CPU %8.3f : split mtxB", t2 - t1) ;
318 fprintf(msgFile,
319 "\n # sent bytes sent # recv bytes recv"
320 "\n local comm : %6d %12d %6d %12d",
321 stats[0], stats[2], stats[1], stats[3]) ;
322 fflush(msgFile) ;
323 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
324 MPI_SUM, 0, MPI_COMM_WORLD) ;
325 if ( myid == 0 ) {
326 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
327 tstats[0], tstats[2], tstats[1], tstats[3]) ;
328 fflush(msgFile) ;
329 }
330 if ( msglvl > 1 ) {
331 fprintf(msgFile, "\n\n mtxB object") ;
332 DenseMtx_writeForHumanEye(mtxB, msgFile) ;
333 }
334 /*
335 ------------------
336 split the A matrix
337 ------------------
338 */
339 tag = 1 ;
340 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
341 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
342 MARKTIME(t1) ;
343 InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
344 mtxAkeep = InpMtx_MPI_split(mtxA, vtxmapIV, stats, msglvl,
345 msgFile, tag, MPI_COMM_WORLD) ;
346 InpMtx_free(mtxA) ;
347 mtxA = mtxAkeep ;
348 InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
349 MARKTIME(t2) ;
350 fprintf(msgFile, "\n CPU %8.3f : split mtxA", t2 - t1) ;
351 fprintf(msgFile,
352 "\n # sent bytes sent # recv bytes recv"
353 "\n local comm : %6d %12d %6d %12d",
354 stats[0], stats[2], stats[1], stats[3]) ;
355 fflush(msgFile) ;
356 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
357 MPI_SUM, 0, MPI_COMM_WORLD) ;
358 if ( myid == 0 ) {
359 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
360 tstats[0], tstats[2], tstats[1], tstats[3]) ;
361 fflush(msgFile) ;
362 }
363 if ( msglvl > 1 ) {
364 fprintf(msgFile, "\n\n mtxA object") ;
365 InpMtx_writeForHumanEye(mtxA, msgFile) ;
366 }
367 /*
368 ----------------------------------
369 compute the symbolic factorization
370 ----------------------------------
371 */
372 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
373 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
374 MARKTIME(t1) ;
375 symbfacIVL = SymbFac_MPI_initFromInpMtx(frontETree, frontOwnersIV, mtxA,
376 stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
377 MARKTIME(t2) ;
378 fprintf(msgFile, "\n CPU %8.3f : symbolic factorization", t2 - t1) ;
379 fprintf(msgFile,
380 "\n # sent bytes sent # recv bytes recv"
381 "\n local comm : %6d %12d %6d %12d",
382 stats[0], stats[2], stats[1], stats[3]) ;
383 fflush(msgFile) ;
384 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
385 MPI_SUM, 0, MPI_COMM_WORLD) ;
386 if ( myid == 0 ) {
387 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
388 tstats[0], tstats[2], tstats[1], tstats[3]) ;
389 fflush(msgFile) ;
390 }
391 if ( msglvl > 1 ) {
392 fprintf(msgFile, "\n\n symbolic factorization object") ;
393 IVL_writeForHumanEye(symbfacIVL, msgFile) ;
394 }
395 /*
396 ----------------------------------
397 initialize the front matrix object
398 ----------------------------------
399 */
400 factormanager = SubMtxManager_new() ;
401 SubMtxManager_init(factormanager, 0, 0) ;
402 MARKTIME(t1) ;
403 frontmtx = FrontMtx_new() ;
404 FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag,
405 sparsityflag, pivotingflag, NO_LOCK, myid, frontOwnersIV,
406 factormanager, msglvl, msgFile) ;
407 MARKTIME(t2) ;
408 fprintf(msgFile, "\n CPU %8.3f : initialize the front matrix",
409 t2 - t1) ;
410 if ( msglvl > 1 ) {
411 fprintf(msgFile, "\n front matrix initialized") ;
412 FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
413 fflush(msgFile) ;
414 }
415 /*
416 -----------------------
417 factor the front matrix
418 -----------------------
419 */
420 nzf = ETree_nFactorEntries(frontETree, symmetryflag) ;
421 factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ;
422 IVzero(17, stats) ;
423 DVzero(14, cpus) ;
424 chvmanager = ChvManager_new() ;
425 ChvManager_init(chvmanager, NO_LOCK, 0) ;
426 MARKTIME(t1) ;
427 rootchv = FrontMtx_MPI_factorInpMtx(frontmtx, mtxA, tau, droptol,
428 chvmanager, frontOwnersIV, lookahead, &factorerror,
429 cpus, stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
430 MARKTIME(t2) ;
431 fprintf(msgFile, "\n CPU %8.3f : factor matrix, %8.3f mflops",
432 t2 - t1, 1.e-6*factorops/(t2-t1)) ;
433 if ( factorerror >= 0 ) {
434 fprintf(msgFile, "\n\n error found in front %d", factorerror) ;
435 exit(-1) ;
436 }
437 if ( rootchv != NULL ) {
438 Chv *chv ;
439 fprintf(msgFile, "\n\n factorization did not complete") ;
440 for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
441 fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
442 chv->id, chv->nD, chv->nL, chv->nU) ;
443 Chv_writeForHumanEye(chv, msgFile) ;
444 }
445 }
446 DVzero(14, tcpus) ;
447 MPI_Reduce((void *) cpus, (void *) tcpus, 14, MPI_DOUBLE,
448 MPI_SUM, 0, MPI_COMM_WORLD) ;
449 cputotal = DVsum(14, tcpus) ;
450 if ( cputotal > 0.0 ) {
451 fprintf(msgFile,
452 "\n initialize fronts %8.3f %6.2f"
453 "\n load fronts %8.3f %6.2f"
454 "\n update fronts %8.3f %6.2f"
455 "\n aggregate insert %8.3f %6.2f"
456 "\n aggregate remove/add %8.3f %6.2f"
457 "\n assemble postponed data %8.3f %6.2f"
458 "\n factor fronts %8.3f %6.2f"
459 "\n extract postponed data %8.3f %6.2f"
460 "\n store factor entries %8.3f %6.2f"
461 "\n post initial receives %8.3f %6.2f"
462 "\n check for recv'd messages %8.3f %6.2f"
463 "\n post initial sends %8.3f %6.2f"
464 "\n check for sent messages %8.3f %6.2f"
465 "\n total time %8.3f",
466 tcpus[0], 100.*tcpus[0]/cputotal,
467 tcpus[1], 100.*tcpus[1]/cputotal,
468 tcpus[2], 100.*tcpus[2]/cputotal,
469 tcpus[3], 100.*tcpus[3]/cputotal,
470 tcpus[4], 100.*tcpus[4]/cputotal,
471 tcpus[5], 100.*tcpus[5]/cputotal,
472 tcpus[6], 100.*tcpus[6]/cputotal,
473 tcpus[7], 100.*tcpus[7]/cputotal,
474 tcpus[8], 100.*tcpus[8]/cputotal,
475 tcpus[9], 100.*tcpus[9]/cputotal,
476 tcpus[10], 100.*tcpus[10]/cputotal,
477 tcpus[11], 100.*tcpus[11]/cputotal,
478 tcpus[12], 100.*tcpus[12]/cputotal, cputotal) ;
479 }
480 fprintf(msgFile,
481 "\n\n Local statistics"
482 "\n %d pivots, %d pivot tests, %d delayed rows and columns"
483 "\n %d entries in D, %d entries in L, %d entries in U"
484 "\n %d active Chv objects, %d active bytes, %d requested bytes",
485 stats[0], stats[1], stats[2], stats[3], stats[4], stats[5],
486 stats[14], stats[15], stats[16]) ;
487 fprintf(msgFile,
488 "\n local comm : # sent bytes sent # recv bytes recv"
489 "\n aggregate %6d %12d %6d %12d"
490 "\n postponed %6d %12d %6d %12d",
491 stats[6], stats[7], stats[8], stats[9],
492 stats[10], stats[11], stats[12], stats[13]) ;
493 IVzero(11, tstats) ;
494 MPI_Reduce((void *) stats, (void *) tstats, 17, MPI_INT,
495 MPI_SUM, 0, MPI_COMM_WORLD) ;
496 if ( myid == 0 ) {
497 fprintf(msgFile,
498 "\n\n Global statistics"
499 "\n %d pivots, %d pivot tests, %d delayed rows and columns"
500 "\n %d entries in D, %d entries in L, %d entries in U"
501 "\n %d active Chv objects, %d active bytes, %d requested bytes",
502 tstats[0], tstats[1], tstats[2],
503 tstats[3], tstats[4], tstats[5],
504 tstats[14], tstats[15], tstats[16]) ;
505 fprintf(msgFile,
506 "\n global comm : # sent bytes sent # recv bytes recv"
507 "\n aggregate %6d %12d %6d %12d"
508 "\n postponed %6d %12d %6d %12d",
509 tstats[6], tstats[7], tstats[8], tstats[9],
510 tstats[10], tstats[11], tstats[12], tstats[13]) ;
511 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
512 solveops = 2*(tstats[3] + tstats[4] + tstats[5]) ;
513 } else {
514 solveops = 2*(tstats[3] + 2*tstats[5]) ;
515 }
516 if ( FRONTMTX_IS_COMPLEX(frontmtx) ) {
517 solveops *= 4 ;
518 }
519 solveops *= nrhs ;
520 fprintf(msgFile, "\n\n solve operations = %.0f", solveops) ;
521 }
522 SubMtxManager_writeForHumanEye(factormanager, msgFile) ;
523 ChvManager_writeForHumanEye(chvmanager, msgFile) ;
524 if ( msglvl > 2 ) {
525 fprintf(msgFile, "\n\n front factor matrix") ;
526 FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
527 fflush(msgFile) ;
528 }
529 /*
530 ----------------------------------------------------------
531 redistribute the right hand side and solution if necessary
532 ----------------------------------------------------------
533 */
534 MARKTIME(t1) ;
535 if ( FRONTMTX_IS_PIVOTING(frontmtx) ) {
536 IV *colmapIV ;
537
538 colmapIV = FrontMtx_MPI_colmapIV(frontmtx, frontOwnersIV,
539 msglvl, msgFile, MPI_COMM_WORLD) ;
540 if ( msglvl > 2 ) {
541 fprintf(msgFile, "\n\n column map IV object") ;
542 IV_writeForHumanEye(colmapIV, msgFile) ;
543 fflush(msgFile) ;
544 }
545 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
546 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
547 MARKTIME(t1) ;
548 mtxkeep = DenseMtx_MPI_splitByRows(mtxX, colmapIV, stats, msglvl,
549 msgFile, tag, MPI_COMM_WORLD) ;
550 DenseMtx_free(mtxX) ;
551 mtxX = mtxkeep ;
552 MARKTIME(t2) ;
553 fprintf(msgFile,
554 "\n\n CPU %8.3f : solution matrix split ", t2 - t1) ;
555 fprintf(msgFile,
556 "\n # sent bytes sent # recv bytes recv"
557 "\n local comm : %6d %12d %6d %12d",
558 stats[0], stats[2], stats[1], stats[3]) ;
559 fflush(msgFile) ;
560 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
561 MPI_SUM, 0, MPI_COMM_WORLD) ;
562 if ( myid == 0 ) {
563 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
564 tstats[0], tstats[2], tstats[1], tstats[3]) ;
565 fflush(msgFile) ;
566 }
567 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
568 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
569 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
570 IV *rowmapIV ;
571
572 rowmapIV = FrontMtx_MPI_rowmapIV(frontmtx, frontOwnersIV, msglvl,
573 msgFile, MPI_COMM_WORLD) ;
574 if ( msglvl > 2 ) {
575 fprintf(msgFile, "\n\n row map IV object") ;
576 IV_writeForHumanEye(rowmapIV, msgFile) ;
577 fflush(msgFile) ;
578 }
579 MARKTIME(t1) ;
580 mtxkeep = DenseMtx_MPI_splitByRows(mtxB, rowmapIV, stats, msglvl,
581 msgFile, tag, MPI_COMM_WORLD) ;
582 DenseMtx_free(mtxB) ;
583 mtxB = mtxkeep ;
584 MARKTIME(t2) ;
585 fprintf(msgFile, "\n CPU %8.3f : rhs matrix split ", t2 - t1) ;
586 IV_free(rowmapIV) ;
587 } else {
588 IVfill(4, stats, 0) ;
589 MARKTIME(t1) ;
590 mtxkeep = DenseMtx_MPI_splitByRows(mtxB, colmapIV, stats, msglvl,
591 msgFile, tag, MPI_COMM_WORLD) ;
592 DenseMtx_free(mtxB) ;
593 mtxB = mtxkeep ;
594 MARKTIME(t2) ;
595 fprintf(msgFile, "\n CPU %8.3f : rhs matrix split ", t2 - t1) ;
596 }
597 fprintf(msgFile,
598 "\n\n Redistibute rhs and solution for this process"
599 "\n # sent bytes sent # recv bytes recv"
600 "\n local comm : %6d %12d %6d %12d",
601 stats[0], stats[2], stats[1], stats[3]) ;
602 fflush(msgFile) ;
603 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
604 MPI_SUM, 0, MPI_COMM_WORLD) ;
605 if ( myid == 0 ) {
606 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
607 tstats[0], tstats[2], tstats[1], tstats[3]) ;
608 fflush(msgFile) ;
609 }
610 if ( msglvl > 2 ) {
611 fprintf(msgFile, "\n\n solution matrix") ;
612 DenseMtx_writeForHumanEye(mtxX, msgFile) ;
613 fprintf(msgFile, "\n\n right hand side matrix") ;
614 DenseMtx_writeForHumanEye(mtxB, msgFile) ;
615 fflush(msgFile) ;
616 }
617 IV_free(colmapIV) ;
618 }
619 MARKTIME(t2) ;
620 fprintf(msgFile, "\n\n CPU %8.3f : solution and rhs set up ", t2-t1);
621 /*
622 -----------------------------------------------------------------
623 post-process the matrix and convert to a submatrix representation
624 -----------------------------------------------------------------
625 */
626 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
627 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
628 MARKTIME(t1) ;
629 FrontMtx_MPI_postProcess(frontmtx, frontOwnersIV,
630 stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
631 MARKTIME(t2) ;
632 fprintf(msgFile, "\n CPU %8.3f : post-process the factor matrix",
633 t2 - t1) ;
634 fprintf(msgFile,
635 "\n Post-process communication for this process"
636 "\n # sent bytes sent # recv bytes recv"
637 "\n local comm : %6d %12d %6d %12d",
638 stats[0], stats[2], stats[1], stats[3]) ;
639 fflush(msgFile) ;
640 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
641 MPI_SUM, 0, MPI_COMM_WORLD) ;
642 if ( myid == 0 ) {
643 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
644 tstats[0], tstats[2], tstats[1], tstats[3]) ;
645 fflush(msgFile) ;
646 }
647 if ( msglvl > 1 ) {
648 fprintf(msgFile, "\n\n after post-processing, front factor matrix") ;
649 FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
650 fflush(msgFile) ;
651 }
652 /*
653 ---------------------------
654 create the solve map object
655 ---------------------------
656 */
657 MARKTIME(t1) ;
658 solvemap = SolveMap_new() ;
659 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx)
660 && FRONTMTX_IS_PIVOTING(frontmtx) ) {
661 SolveMap_ddMap(solvemap, SPOOLES_NONSYMMETRIC,
662 frontmtx->upperblockIVL, frontmtx->lowerblockIVL,
663 nproc, frontOwnersIV, frontmtx->tree,
664 seed, msglvl, msgFile);
665 } else {
666 SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC, frontmtx->upperblockIVL,
667 NULL, nproc, frontOwnersIV, frontmtx->tree,
668 seed, msglvl, msgFile) ;
669 }
670 MARKTIME(t2) ;
671 fprintf(msgFile, "\n CPU %8.3f : solve map created", t2 - t1) ;
672 if ( msglvl > 2 ) {
673 SolveMap_writeForHumanEye(solvemap, msgFile) ;
674 } else {
675 SolveMap_writeStats(solvemap, msgFile) ;
676 }
677 fflush(msgFile) ;
678 /*
679 --------------------------------------------------
680 split the front matrix to conform to the solve map
681 --------------------------------------------------
682 */
683 stats[0] = stats[1] = stats[2] = stats[3] = 0 ;
684 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
685 MARKTIME(t1) ;
686 FrontMtx_MPI_split(frontmtx, solvemap,
687 stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
688 MARKTIME(t2) ;
689 fprintf(msgFile, "\n\n CPU %8.3f : front matrix split", t2 - t1) ;
690 fprintf(msgFile,
691 "\n Split front matrix communication for this process"
692 "\n # sent bytes sent # recv bytes recv"
693 "\n local comm : %6d %12d %6d %12d",
694 stats[0], stats[2], stats[1], stats[3]) ;
695 fflush(msgFile) ;
696 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
697 MPI_SUM, 0, MPI_COMM_WORLD) ;
698 if ( myid == 0 ) {
699 fprintf(msgFile, "\n global comm : %6d %12d %6d %12d\n",
700 tstats[0], tstats[2], tstats[1], tstats[3]) ;
701 fflush(msgFile) ;
702 }
703 if ( msglvl > 1 ) {
704 fprintf(msgFile, "\n\n after splitting, front factor matrix") ;
705 FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
706 fflush(msgFile) ;
707 }
708 /*
709 -----------------------
710 solve the linear system
711 -----------------------
712 */
713 solvemanager = SubMtxManager_new() ;
714 SubMtxManager_init(solvemanager, 0, 0) ;
715 IVzero(8, stats) ;
716 IVzero(8, tstats) ;
717 DVzero(5, cpus) ;
718 mtxZ = DenseMtx_new() ;
719 DenseMtx_init(mtxZ, type, 0, 0, mtxX->nrow, mtxX->ncol, 1, mtxX->nrow) ;
720 DenseMtx_zero(mtxZ) ;
721 DenseMtx_rowIndices(mtxX, &nrow, &rowindX) ;
722 DenseMtx_rowIndices(mtxZ, &nrow, &rowindZ) ;
723 IVcopy(nrow, rowindZ, rowindX) ;
724 MARKTIME(t1) ;
725 FrontMtx_MPI_solve(frontmtx, mtxZ, mtxB, solvemanager, solvemap,
726 cpus, stats, msglvl, msgFile, tag, MPI_COMM_WORLD);
727 MARKTIME(t2) ;
728 solvecpu = t2 - t1 ;
729 fprintf(msgFile, "\n CPU %8.3f : solve done ", solvecpu) ;
730 /*
731 fprintf(msgFile, "\n\n cpus") ;
732 DVfprintf(msgFile, 6, cpus) ;
733 */
734 DVzero(6, tcpus) ;
735 MPI_Reduce((void *) cpus, (void *) tcpus, 6, MPI_DOUBLE,
736 MPI_SUM, 0, MPI_COMM_WORLD) ;
737 if ( myid == 0 ) {
738 fprintf(msgFile, ", %.3f mflops", 1.e-6*solveops/(t2-t1)) ;
739 /*
740 fprintf(msgFile, "\n\n tcpus") ;
741 DVfprintf(msgFile, 6, tcpus) ;
742 */
743 solvecpu = DVsum(6, tcpus) ;
744 fprintf(msgFile, "\n\n Solve CPU breakdown:") ;
745 if ( solvecpu > 0.0 ) {
746 fprintf(msgFile,
747 "\n set up solves %8.3f %6.2f"
748 "\n load rhs and store solution %8.3f %6.2f"
749 "\n forward solve %8.3f %6.2f"
750 "\n diagonal solve %8.3f %6.2f"
751 "\n backward solve %8.3f %6.2f"
752 "\n miscellaneous %8.3f %6.2f"
753 "\n total time %8.3f",
754 tcpus[0], 100.*tcpus[0]/solvecpu,
755 tcpus[1], 100.*tcpus[1]/solvecpu,
756 tcpus[2], 100.*tcpus[2]/solvecpu,
757 tcpus[3], 100.*tcpus[3]/solvecpu,
758 tcpus[4], 100.*tcpus[4]/solvecpu,
759 tcpus[5], 100.*tcpus[5]/solvecpu, solvecpu) ;
760 }
761 }
762 fprintf(msgFile,
763 "\n\n Solve communication for this processor"
764 "\n aggregates solution"
765 "\n # # bytes # #bytes"
766 "\n send %10d %10d %10d %10d"
767 "\n recv %10d %10d %10d %10d",
768 stats[1], stats[3], stats[0], stats[2],
769 stats[5], stats[7], stats[4], stats[6]) ;
770 fflush(msgFile) ;
771 MPI_Reduce((void *) stats, (void *) tstats, 8, MPI_INT,
772 MPI_SUM, 0, MPI_COMM_WORLD) ;
773 if ( myid == 0 ) {
774 fprintf(msgFile,
775 "\n\n Solve communication for all processors"
776 "\n aggregates solution"
777 "\n # # bytes # #bytes"
778 "\n send %10d %10d %10d %10d"
779 "\n recv %10d %10d %10d %10d",
780 tstats[1], tstats[3], tstats[0], tstats[2],
781 tstats[5], tstats[7], tstats[4], tstats[6]) ;
782 fflush(msgFile) ;
783 }
784 SubMtxManager_writeForHumanEye(solvemanager, msgFile) ;
785 if ( msglvl > 2 ) {
786 fprintf(msgFile, "\n\n computed solution") ;
787 DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
788 fflush(msgFile) ;
789 }
790 DenseMtx_sub(mtxZ, mtxX) ;
791 error = DenseMtx_maxabs(mtxZ) ;
792 fprintf(msgFile, "\n\n local error = %12.4e", error) ;
793 MPI_Reduce((void *) &error, (void *) &terror, 1, MPI_DOUBLE,
794 MPI_MAX, 0, MPI_COMM_WORLD) ;
795 if ( myid == 0 ) {
796 fprintf(msgFile, "\n global error = %12.4e", terror) ;
797 fflush(msgFile) ;
798 }
799 if ( msglvl > 2 ) {
800 fprintf(msgFile, "\n\n error matrix") ;
801 DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
802 fflush(msgFile) ;
803 }
804 /*
805 ------------------------
806 free the working storage
807 ------------------------
808 */
809 DenseMtx_free(mtxX) ;
810 DenseMtx_free(mtxB) ;
811 DenseMtx_free(mtxZ) ;
812 InpMtx_free(mtxA) ;
813 SubMtxManager_free(factormanager) ;
814 SubMtxManager_free(solvemanager) ;
815 ChvManager_free(chvmanager) ;
816 FrontMtx_free(frontmtx) ;
817 ETree_free(frontETree) ;
818 IVL_free(symbfacIVL) ;
819 IV_free(vtxmapIV) ;
820 IV_free(frontOwnersIV) ;
821 SolveMap_free(solvemap) ;
822
823 MPI_Barrier(MPI_COMM_WORLD) ;
824
825 fclose(msgFile) ;
826
827 MPI_Finalize() ;
828
829 return(1) ; }
830
831 /*--------------------------------------------------------------------*/
832