1 /*  testGridMPI.c  */
2 
3 #include "../spoolesMPI.h"
4 #include "../../timings.h"
5 
6 /*--------------------------------------------------------------------*/
7 void mkNDlinsys ( int n1, int n2, int n3, int maxzeros, int maxsize,
8    int type, int symmetryflag, int nrhs, int seed, int msglvl,
9    FILE *msgFile, ETree **pfrontETree, IVL **psymbfacIVL,
10    InpMtx **pmtxA, DenseMtx **pmtxX, DenseMtx **pmtxB ) ;
11 /*--------------------------------------------------------------------*/
12 int
main(int argc,char * argv[])13 main ( int argc, char *argv[] )
14 /*
15 */
16 {
17 Chv             *rootchv ;
18 ChvManager      *chvmanager ;
19 DenseMtx        *mtxB, *mtxkeep, *mtxX, *mtxZ ;
20 double          cputotal, cutoff, droptol, error, factorops,
21                 solvecpu, solveops, tau, terror, t1, t2 ;
22 double          cpus[14], tcpus[14] ;
23 DV              *cumopsDV ;
24 ETree           *frontETree ;
25 FILE            *msgFile ;
26 FrontMtx        *frontmtx ;
27 InpMtx          *mtxA, *mtxAkeep ;
28 int             factorerror, lookahead, maptype, maxsize, maxzeros,
29                 msglvl, myid, neqns, nproc, nrhs, nrow, nzf, n1, n2,
30                 n3, pivotingflag, seed, sparsityflag, symmetryflag,
31                 tag, type ;
32 int             *rowindX, *rowindZ ;
33 int             stats[17], tstats[17] ;
34 IV              *frontOwnersIV, *vtxmapIV ;
35 IVL             *symbfacIVL ;
36 SolveMap        *solvemap ;
37 SubMtxManager   *factormanager, *solvemanager ;
38 /*
39    ---------------------------------------------------------------
40    find out the identity of this process and the number of process
41    ---------------------------------------------------------------
42 */
43 MPI_Init(&argc, &argv) ;
44 MPI_Comm_rank(MPI_COMM_WORLD, &myid) ;
45 MPI_Comm_size(MPI_COMM_WORLD, &nproc) ;
46 fprintf(stdout, "\n process %d of %d, argc = %d", myid, nproc, argc) ;
47 fflush(stdout) ;
48 if ( argc != 19 ) {
49    fprintf(stdout,
50       "\n\n usage : %s "
51       "\n         msglvl msgFile n1 n2 n3 maxzeros maxsize seed "
52       "\n         type symmetryflag sparsityflag pivotingflag "
53       "\n         tau droptol lookahead nrhs maptype cutoff"
54       "\n    msglvl       -- message level"
55       "\n    msgFile      -- message file"
56       "\n    n1           -- # of points in first dimension"
57       "\n    n2           -- # of points in second dimension"
58       "\n    n3           -- # of points in third dimension"
59       "\n    maxzeros     -- max # of zeros in a front"
60       "\n    maxsize      -- max # of internal vertices in a front"
61       "\n    seed         -- random number seed"
62       "\n    type         -- type of entries"
63       "\n       1 --> real "
64       "\n       2 --> complex "
65       "\n    symmetryflag -- symmetry flag"
66       "\n       0 --> symmetric "
67       "\n       1 --> hermitian "
68       "\n       2 --> nonsymmetric "
69       "\n    sparsityflag -- sparsity flag"
70       "\n       0 --> store dense fronts"
71       "\n       1 --> store sparse fronts, use droptol to drop entries"
72       "\n    pivotingflag -- pivoting flag"
73       "\n       0 --> do not pivot"
74       "\n       1 --> enable pivoting"
75       "\n    tau     -- upper bound on factor entries"
76       "\n               used only with pivoting"
77       "\n    droptol -- lower bound on factor entries"
78       "\n               used only with sparse fronts"
79       "\n    lookahead -- parameter to schedule computation"
80       "\n       0 --> no lookahead"
81       "\n       nonzero --> look up the tree this many steps"
82       "\n    nrhs -- number of right hand sides"
83       "\n    maptype -- type of factorization map"
84       "\n       1 --> wrap map"
85       "\n       2 --> balanced map via post-order traversal"
86       "\n       3 --> subtree-subset map"
87       "\n       4 --> domain decomposition map"
88       "\n    cutoff -- used when maptype = 4"
89       "\n       0 < cutoff < 1 to define multisector"
90       "\n       try cutoff = 1/nproc or 1/(2*nproc)"
91       "\n", argv[0]) ;
92    { int ii ;
93    fprintf(stdout, "\n\n argc = %d", argc) ;
94    for ( ii = 0 ; ii < argc ; ii++ ) {
95       fprintf(stdout, "\n arg %d = %s", ii, argv[ii]) ;
96    }
97    }
98    MPI_Finalize() ;
99    return(0) ;
100 }
101 msglvl = atoi(argv[1]) ;
102 if ( strcmp(argv[2], "stdout") == 0 ) {
103    msgFile = stdout ;
104 } else {
105    int    length = strlen(argv[2]) + 1 + 4 ;
106    char   *buffer = CVinit(length, '\0') ;
107    sprintf(buffer, "%s.%d", argv[2], myid) ;
108    if ( (msgFile = fopen(buffer, "w")) == NULL ) {
109       fprintf(stderr, "\n fatal error in %s"
110               "\n unable to open file %s\n",
111               argv[0], buffer) ;
112       return(-1) ;
113    }
114    CVfree(buffer) ;
115 }
116 n1           = atoi(argv[ 3]) ;
117 n2           = atoi(argv[ 4]) ;
118 n3           = atoi(argv[ 5]) ;
119 maxzeros     = atoi(argv[ 6]) ;
120 maxsize      = atoi(argv[ 7]) ;
121 seed         = atoi(argv[ 8]) ;
122 type         = atoi(argv[ 9]) ;
123 symmetryflag = atoi(argv[10]) ;
124 sparsityflag = atoi(argv[11]) ;
125 pivotingflag = atoi(argv[12]) ;
126 tau          = atof(argv[13]) ;
127 droptol      = atof(argv[14]) ;
128 lookahead    = atoi(argv[15]) ;
129 nrhs         = atoi(argv[16]) ;
130 maptype      = atoi(argv[17]) ;
131 cutoff       = atof(argv[18]) ;
132 fprintf(msgFile, "\n %s :"
133         "\n   msglvl       = %d"
134         "\n   msgFile      = %s"
135         "\n   n1           = %d"
136         "\n   n2           = %d"
137         "\n   n3           = %d"
138         "\n   maxzeros     = %d"
139         "\n   maxsize      = %d"
140         "\n   seed         = %d"
141         "\n   type         = %d"
142         "\n   symmetryflag = %d"
143         "\n   sparsityflag = %d"
144         "\n   pivotingflag = %d"
145         "\n   tau          = %f"
146         "\n   droptol      = %f"
147         "\n   lookahead    = %d"
148         "\n   nrhs         = %d"
149         "\n   maptype      = %d"
150         "\n   cutoff       = %f"
151         "\n",
152         argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize, seed,
153         type, symmetryflag, sparsityflag, pivotingflag, tau, droptol,
154         lookahead, nrhs, maptype, cutoff) ;
155 fflush(msgFile) ;
156 neqns = n1*n2*n3 ;
157 /*
158    ---------------------------------------------------------------
159    find out the identity of this process and the number of process
160    ---------------------------------------------------------------
161 */
162 MPI_Comm_rank(MPI_COMM_WORLD, &myid) ;
163 MPI_Comm_size(MPI_COMM_WORLD, &nproc) ;
164 if ( myid == 0 ) {
165 /*
166    --------------------------
167    generate the linear system
168    --------------------------
169 */
170    mkNDlinsys(n1, n2, n3, maxzeros, maxsize, type, symmetryflag, nrhs,
171               seed, msglvl, msgFile, &frontETree, &symbfacIVL, &mtxA,
172               &mtxX, &mtxB) ;
173 /*
174    -------------------------------
175    free the symbolic factorization
176    -------------------------------
177 */
178    IVL_free(symbfacIVL) ;
179 /*
180    -------------------------------------------
181    send the front tree to the other processors
182    -------------------------------------------
183 */
184    frontETree = ETree_MPI_Bcast(frontETree, 0,
185                                 msglvl, msgFile, MPI_COMM_WORLD) ;
186 } else {
187 /*
188    ---------------------------------------
189    receive the front tree from processor 0
190    ---------------------------------------
191 */
192    frontETree = ETree_MPI_Bcast(NULL, 0,
193                                 msglvl, msgFile, MPI_COMM_WORLD) ;
194 /*
195    ------------------------------
196    create the objects for X and B
197    ------------------------------
198 */
199    mtxX = DenseMtx_new() ;
200    mtxB = DenseMtx_new() ;
201    DenseMtx_init(mtxX, type, -1, -1, 0, nrhs, 1, 0) ;
202    DenseMtx_init(mtxB, type, -1, -1, 0, nrhs, 1, 0) ;
203 /*
204    -----------------------
205    create the object for A
206    -----------------------
207 */
208    mtxA = InpMtx_new() ;
209    InpMtx_init(mtxA, INPMTX_BY_CHEVRONS, type, 0, 0) ;
210 }
211 if ( msglvl > 1 ) {
212    fprintf(msgFile, "\n\n front ETree object") ;
213    ETree_writeForHumanEye(frontETree, msgFile) ;
214    fprintf(msgFile, "\n\n mtxX") ;
215    DenseMtx_writeForHumanEye(mtxX, msgFile) ;
216    fprintf(msgFile, "\n\n mtxB") ;
217    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
218    fprintf(msgFile, "\n\n mtxA") ;
219    InpMtx_writeForHumanEye(mtxA, msgFile) ;
220 }
221 /*
222    ---------------------------------
223    generate the owners map IV object
224    ---------------------------------
225 */
226 cumopsDV = DV_new() ;
227 DV_init(cumopsDV, nproc, NULL) ;
228 DV_fill(cumopsDV, 0.0) ;
229 switch ( maptype ) {
230 case 1 :
231    frontOwnersIV = ETree_wrapMap(frontETree, type, symmetryflag,
232                                  cumopsDV) ;
233    break ;
234 case 2 :
235    frontOwnersIV = ETree_balancedMap(frontETree, type, symmetryflag,
236                                      cumopsDV) ;
237    break ;
238 case 3 :
239    frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type,
240                                           symmetryflag, cumopsDV) ;
241    break ;
242 case 4 :
243    frontOwnersIV = ETree_ddMap(frontETree, type, symmetryflag,
244                                cumopsDV, cutoff) ;
245    break ;
246 default :
247    fprintf(stderr, "\n fatal error, maptype = %d", maptype) ;
248    MPI_Finalize() ;
249    exit(-1) ;
250    break ;
251 }
252 if ( msglvl > 1 ) {
253    fprintf(msgFile, "\n\n owners map object") ;
254    IV_writeForHumanEye(frontOwnersIV, msgFile) ;
255 }
256 fprintf(msgFile, "\n\n upper bound on load balance = %4.2f\n",
257         DV_sum(cumopsDV)/DV_max(cumopsDV)) ;
258 DV_free(cumopsDV) ;
259 /*
260    ---------------------
261    create the vertex map
262    ---------------------
263 */
264 vtxmapIV = IV_new() ;
265 IV_init(vtxmapIV, neqns, NULL) ;
266 IVgather(neqns, IV_entries(vtxmapIV), IV_entries(frontOwnersIV),
267          ETree_vtxToFront(frontETree)) ;
268 if ( msglvl > 1 ) {
269    fprintf(msgFile, "\n\n vertex map object") ;
270    IV_writeForHumanEye(vtxmapIV, msgFile) ;
271 }
272 /*
273    ------------------
274    split the X matrix
275    ------------------
276 */
277 tag = 1 ;
278 stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
279 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
280 MARKTIME(t1) ;
281 mtxkeep = DenseMtx_MPI_splitByRows(mtxX, vtxmapIV, stats, msglvl,
282                                    msgFile, tag, MPI_COMM_WORLD) ;
283 DenseMtx_free(mtxX) ;
284 mtxX = mtxkeep ;
285 MARKTIME(t2) ;
286 fprintf(msgFile, "\n CPU %8.3f : split mtxX", t2 - t1) ;
287 fprintf(msgFile,
288         "\n                 # sent   bytes sent   # recv   bytes recv"
289         "\n local comm  : %6d %12d   %6d %12d",
290         stats[0],  stats[2],  stats[1],  stats[3]) ;
291 fflush(msgFile) ;
292 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
293           MPI_SUM, 0, MPI_COMM_WORLD) ;
294 if ( myid == 0 ) {
295    fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
296            tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
297    fflush(msgFile) ;
298 }
299 if ( msglvl > 1 ) {
300    fprintf(msgFile, "\n\n mtxX object") ;
301    DenseMtx_writeForHumanEye(mtxX, msgFile) ;
302 }
303 /*
304    ------------------
305    split the B matrix
306    ------------------
307 */
308 tag = 1 ;
309 stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
310 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
311 MARKTIME(t1) ;
312 mtxkeep = DenseMtx_MPI_splitByRows(mtxB, vtxmapIV, stats, msglvl,
313                                    msgFile, tag, MPI_COMM_WORLD) ;
314 DenseMtx_free(mtxB) ;
315 mtxB = mtxkeep ;
316 MARKTIME(t2) ;
317 fprintf(msgFile, "\n CPU %8.3f : split mtxB", t2 - t1) ;
318 fprintf(msgFile,
319         "\n                 # sent   bytes sent   # recv   bytes recv"
320         "\n local comm  : %6d %12d   %6d %12d",
321         stats[0],  stats[2],  stats[1],  stats[3]) ;
322 fflush(msgFile) ;
323 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
324           MPI_SUM, 0, MPI_COMM_WORLD) ;
325 if ( myid == 0 ) {
326    fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
327            tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
328    fflush(msgFile) ;
329 }
330 if ( msglvl > 1 ) {
331    fprintf(msgFile, "\n\n mtxB object") ;
332    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
333 }
334 /*
335    ------------------
336    split the A matrix
337    ------------------
338 */
339 tag = 1 ;
340 stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
341 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
342 MARKTIME(t1) ;
343 InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
344 mtxAkeep = InpMtx_MPI_split(mtxA, vtxmapIV, stats, msglvl,
345                             msgFile, tag, MPI_COMM_WORLD) ;
346 InpMtx_free(mtxA) ;
347 mtxA = mtxAkeep ;
348 InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
349 MARKTIME(t2) ;
350 fprintf(msgFile, "\n CPU %8.3f : split mtxA", t2 - t1) ;
351 fprintf(msgFile,
352         "\n                 # sent   bytes sent   # recv   bytes recv"
353         "\n local comm  : %6d %12d   %6d %12d",
354         stats[0],  stats[2],  stats[1],  stats[3]) ;
355 fflush(msgFile) ;
356 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
357           MPI_SUM, 0, MPI_COMM_WORLD) ;
358 if ( myid == 0 ) {
359    fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
360            tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
361    fflush(msgFile) ;
362 }
363 if ( msglvl > 1 ) {
364    fprintf(msgFile, "\n\n mtxA object") ;
365    InpMtx_writeForHumanEye(mtxA, msgFile) ;
366 }
367 /*
368    ----------------------------------
369    compute the symbolic factorization
370    ----------------------------------
371 */
372 stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
373 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
374 MARKTIME(t1) ;
375 symbfacIVL = SymbFac_MPI_initFromInpMtx(frontETree, frontOwnersIV, mtxA,
376                           stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
377 MARKTIME(t2) ;
378 fprintf(msgFile, "\n CPU %8.3f : symbolic factorization", t2 - t1) ;
379 fprintf(msgFile,
380         "\n                 # sent   bytes sent   # recv   bytes recv"
381         "\n local comm  : %6d %12d   %6d %12d",
382         stats[0],  stats[2],  stats[1],  stats[3]) ;
383 fflush(msgFile) ;
384 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
385           MPI_SUM, 0, MPI_COMM_WORLD) ;
386 if ( myid == 0 ) {
387    fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
388            tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
389    fflush(msgFile) ;
390 }
391 if ( msglvl > 1 ) {
392    fprintf(msgFile, "\n\n symbolic factorization object") ;
393    IVL_writeForHumanEye(symbfacIVL, msgFile) ;
394 }
395 /*
396    ----------------------------------
397    initialize the front matrix object
398    ----------------------------------
399 */
400 factormanager = SubMtxManager_new() ;
401 SubMtxManager_init(factormanager, 0, 0) ;
402 MARKTIME(t1) ;
403 frontmtx = FrontMtx_new() ;
404 FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag,
405               sparsityflag, pivotingflag, NO_LOCK, myid, frontOwnersIV,
406               factormanager, msglvl, msgFile) ;
407 MARKTIME(t2) ;
408 fprintf(msgFile, "\n CPU %8.3f : initialize the front matrix",
409         t2 - t1) ;
410 if ( msglvl > 1 ) {
411    fprintf(msgFile, "\n front matrix initialized") ;
412    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
413    fflush(msgFile) ;
414 }
415 /*
416    -----------------------
417    factor the front matrix
418    -----------------------
419 */
420 nzf       = ETree_nFactorEntries(frontETree, symmetryflag) ;
421 factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ;
422 IVzero(17, stats) ;
423 DVzero(14, cpus) ;
424 chvmanager = ChvManager_new() ;
425 ChvManager_init(chvmanager, NO_LOCK, 0) ;
426 MARKTIME(t1) ;
427 rootchv = FrontMtx_MPI_factorInpMtx(frontmtx, mtxA, tau, droptol,
428                     chvmanager, frontOwnersIV, lookahead, &factorerror,
429                     cpus, stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
430 MARKTIME(t2) ;
431 fprintf(msgFile, "\n CPU %8.3f : factor matrix, %8.3f mflops",
432         t2 - t1, 1.e-6*factorops/(t2-t1)) ;
433 if ( factorerror >= 0 ) {
434    fprintf(msgFile, "\n\n error found in front %d", factorerror) ;
435    exit(-1) ;
436 }
437 if ( rootchv != NULL ) {
438    Chv   *chv ;
439    fprintf(msgFile, "\n\n factorization did not complete") ;
440    for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
441       fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
442               chv->id, chv->nD, chv->nL, chv->nU) ;
443       Chv_writeForHumanEye(chv, msgFile) ;
444    }
445 }
446 DVzero(14, tcpus) ;
447 MPI_Reduce((void *) cpus, (void *) tcpus, 14, MPI_DOUBLE,
448           MPI_SUM, 0, MPI_COMM_WORLD) ;
449 cputotal = DVsum(14, tcpus) ;
450 if ( cputotal > 0.0 ) {
451    fprintf(msgFile,
452    "\n    initialize fronts         %8.3f %6.2f"
453    "\n    load fronts               %8.3f %6.2f"
454    "\n    update fronts             %8.3f %6.2f"
455    "\n    aggregate insert          %8.3f %6.2f"
456    "\n    aggregate remove/add      %8.3f %6.2f"
457    "\n    assemble postponed data   %8.3f %6.2f"
458    "\n    factor fronts             %8.3f %6.2f"
459    "\n    extract postponed data    %8.3f %6.2f"
460    "\n    store factor entries      %8.3f %6.2f"
461    "\n    post initial receives     %8.3f %6.2f"
462    "\n    check for recv'd messages %8.3f %6.2f"
463    "\n    post initial sends        %8.3f %6.2f"
464    "\n    check for sent messages   %8.3f %6.2f"
465    "\n    total time                %8.3f",
466    tcpus[0], 100.*tcpus[0]/cputotal,
467    tcpus[1], 100.*tcpus[1]/cputotal,
468    tcpus[2], 100.*tcpus[2]/cputotal,
469    tcpus[3], 100.*tcpus[3]/cputotal,
470    tcpus[4], 100.*tcpus[4]/cputotal,
471    tcpus[5], 100.*tcpus[5]/cputotal,
472    tcpus[6], 100.*tcpus[6]/cputotal,
473    tcpus[7], 100.*tcpus[7]/cputotal,
474    tcpus[8], 100.*tcpus[8]/cputotal,
475    tcpus[9], 100.*tcpus[9]/cputotal,
476    tcpus[10], 100.*tcpus[10]/cputotal,
477    tcpus[11], 100.*tcpus[11]/cputotal,
478    tcpus[12], 100.*tcpus[12]/cputotal, cputotal) ;
479 }
480 fprintf(msgFile,
481      "\n\n Local statistics"
482      "\n    %d pivots, %d pivot tests, %d delayed rows and columns"
483      "\n    %d entries in D, %d entries in L, %d entries in U"
484      "\n    %d active Chv objects, %d active bytes, %d requested bytes",
485      stats[0], stats[1], stats[2], stats[3], stats[4], stats[5],
486      stats[14], stats[15], stats[16]) ;
487 fprintf(msgFile,
488         "\n    local comm  : # sent   bytes sent   # recv   bytes recv"
489         "\n    aggregate     %6d %12d   %6d %12d"
490         "\n    postponed     %6d %12d   %6d %12d",
491         stats[6],  stats[7],  stats[8],  stats[9],
492         stats[10], stats[11], stats[12], stats[13]) ;
493 IVzero(11, tstats) ;
494 MPI_Reduce((void *) stats, (void *) tstats, 17, MPI_INT,
495           MPI_SUM, 0, MPI_COMM_WORLD) ;
496 if ( myid == 0 ) {
497    fprintf(msgFile,
498      "\n\n Global statistics"
499      "\n    %d pivots, %d pivot tests, %d delayed rows and columns"
500      "\n    %d entries in D, %d entries in L, %d entries in U"
501      "\n    %d active Chv objects, %d active bytes, %d requested bytes",
502         tstats[0], tstats[1], tstats[2],
503         tstats[3], tstats[4], tstats[5],
504         tstats[14], tstats[15], tstats[16]) ;
505    fprintf(msgFile,
506         "\n    global comm  : # sent   bytes sent   # recv   bytes recv"
507         "\n    aggregate      %6d %12d   %6d %12d"
508         "\n    postponed      %6d %12d   %6d %12d",
509         tstats[6],  tstats[7],  tstats[8],  tstats[9],
510         tstats[10], tstats[11], tstats[12], tstats[13]) ;
511    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
512       solveops = 2*(tstats[3] + tstats[4] + tstats[5]) ;
513    } else {
514       solveops = 2*(tstats[3] + 2*tstats[5]) ;
515    }
516    if ( FRONTMTX_IS_COMPLEX(frontmtx) ) {
517       solveops *= 4 ;
518    }
519    solveops *= nrhs ;
520    fprintf(msgFile, "\n\n solve operations = %.0f", solveops) ;
521 }
522 SubMtxManager_writeForHumanEye(factormanager, msgFile) ;
523 ChvManager_writeForHumanEye(chvmanager, msgFile) ;
524 if ( msglvl > 2 ) {
525    fprintf(msgFile, "\n\n front factor matrix") ;
526    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
527    fflush(msgFile) ;
528 }
529 /*
530    ----------------------------------------------------------
531    redistribute the right hand side and solution if necessary
532    ----------------------------------------------------------
533 */
534 MARKTIME(t1) ;
535 if ( FRONTMTX_IS_PIVOTING(frontmtx) ) {
536    IV   *colmapIV ;
537 
538    colmapIV = FrontMtx_MPI_colmapIV(frontmtx, frontOwnersIV,
539                                     msglvl, msgFile, MPI_COMM_WORLD) ;
540    if ( msglvl > 2 ) {
541       fprintf(msgFile, "\n\n column map IV object") ;
542       IV_writeForHumanEye(colmapIV, msgFile) ;
543       fflush(msgFile) ;
544    }
545    stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
546    tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
547    MARKTIME(t1) ;
548    mtxkeep = DenseMtx_MPI_splitByRows(mtxX, colmapIV, stats, msglvl,
549                                       msgFile, tag, MPI_COMM_WORLD) ;
550    DenseMtx_free(mtxX) ;
551    mtxX = mtxkeep ;
552    MARKTIME(t2) ;
553    fprintf(msgFile,
554            "\n\n CPU %8.3f : solution matrix split ", t2 - t1) ;
555    fprintf(msgFile,
556         "\n                 # sent   bytes sent   # recv   bytes recv"
557         "\n local comm  : %6d %12d   %6d %12d",
558         stats[0],  stats[2],  stats[1],  stats[3]) ;
559    fflush(msgFile) ;
560    MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
561              MPI_SUM, 0, MPI_COMM_WORLD) ;
562    if ( myid == 0 ) {
563       fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
564               tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
565       fflush(msgFile) ;
566    }
567    stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
568    tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
569    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
570       IV   *rowmapIV ;
571 
572       rowmapIV = FrontMtx_MPI_rowmapIV(frontmtx, frontOwnersIV, msglvl,
573                                        msgFile, MPI_COMM_WORLD) ;
574       if ( msglvl > 2 ) {
575          fprintf(msgFile, "\n\n row map IV object") ;
576          IV_writeForHumanEye(rowmapIV, msgFile) ;
577          fflush(msgFile) ;
578       }
579       MARKTIME(t1) ;
580       mtxkeep = DenseMtx_MPI_splitByRows(mtxB, rowmapIV, stats, msglvl,
581                                       msgFile, tag, MPI_COMM_WORLD) ;
582       DenseMtx_free(mtxB) ;
583       mtxB = mtxkeep ;
584       MARKTIME(t2) ;
585       fprintf(msgFile, "\n CPU %8.3f : rhs matrix split ", t2 - t1) ;
586       IV_free(rowmapIV) ;
587    } else {
588       IVfill(4, stats, 0) ;
589       MARKTIME(t1) ;
590       mtxkeep = DenseMtx_MPI_splitByRows(mtxB, colmapIV, stats, msglvl,
591                                       msgFile, tag, MPI_COMM_WORLD) ;
592       DenseMtx_free(mtxB) ;
593       mtxB = mtxkeep ;
594       MARKTIME(t2) ;
595       fprintf(msgFile, "\n CPU %8.3f : rhs matrix split ", t2 - t1) ;
596    }
597    fprintf(msgFile,
598         "\n\n Redistibute rhs and solution for this process"
599         "\n                 # sent   bytes sent   # recv   bytes recv"
600         "\n local comm  : %6d %12d   %6d %12d",
601         stats[0],  stats[2],  stats[1],  stats[3]) ;
602    fflush(msgFile) ;
603    MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
604              MPI_SUM, 0, MPI_COMM_WORLD) ;
605    if ( myid == 0 ) {
606       fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
607               tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
608       fflush(msgFile) ;
609    }
610    if ( msglvl > 2 ) {
611       fprintf(msgFile, "\n\n solution matrix") ;
612       DenseMtx_writeForHumanEye(mtxX, msgFile) ;
613       fprintf(msgFile, "\n\n right hand side matrix") ;
614       DenseMtx_writeForHumanEye(mtxB, msgFile) ;
615       fflush(msgFile) ;
616    }
617    IV_free(colmapIV) ;
618 }
619 MARKTIME(t2) ;
620 fprintf(msgFile, "\n\n CPU %8.3f : solution and rhs set up ", t2-t1);
621 /*
622    -----------------------------------------------------------------
623    post-process the matrix and convert to a submatrix representation
624    -----------------------------------------------------------------
625 */
626 stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
627 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
628 MARKTIME(t1) ;
629 FrontMtx_MPI_postProcess(frontmtx, frontOwnersIV,
630                          stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
631 MARKTIME(t2) ;
632 fprintf(msgFile, "\n CPU %8.3f : post-process the factor matrix",
633         t2 - t1) ;
634 fprintf(msgFile,
635         "\n Post-process communication for this process"
636         "\n                 # sent   bytes sent   # recv   bytes recv"
637         "\n local comm  : %6d %12d   %6d %12d",
638         stats[0],  stats[2],  stats[1],  stats[3]) ;
639 fflush(msgFile) ;
640 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
641           MPI_SUM, 0, MPI_COMM_WORLD) ;
642 if ( myid == 0 ) {
643    fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
644            tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
645    fflush(msgFile) ;
646 }
647 if ( msglvl > 1 ) {
648    fprintf(msgFile, "\n\n after post-processing, front factor matrix") ;
649    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
650    fflush(msgFile) ;
651 }
652 /*
653    ---------------------------
654    create the solve map object
655    ---------------------------
656 */
657 MARKTIME(t1) ;
658 solvemap = SolveMap_new() ;
659 if (  FRONTMTX_IS_NONSYMMETRIC(frontmtx)
660    && FRONTMTX_IS_PIVOTING(frontmtx) ) {
661    SolveMap_ddMap(solvemap, SPOOLES_NONSYMMETRIC,
662                   frontmtx->upperblockIVL, frontmtx->lowerblockIVL,
663                   nproc, frontOwnersIV, frontmtx->tree,
664                   seed, msglvl, msgFile);
665 } else {
666    SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC, frontmtx->upperblockIVL,
667                   NULL, nproc, frontOwnersIV, frontmtx->tree,
668                   seed, msglvl, msgFile) ;
669 }
670 MARKTIME(t2) ;
671 fprintf(msgFile, "\n CPU %8.3f : solve map created", t2 - t1) ;
672 if ( msglvl > 2 ) {
673    SolveMap_writeForHumanEye(solvemap, msgFile) ;
674 } else {
675    SolveMap_writeStats(solvemap, msgFile) ;
676 }
677 fflush(msgFile) ;
678 /*
679    --------------------------------------------------
680    split the front matrix to conform to the solve map
681    --------------------------------------------------
682 */
683 stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
684 tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
685 MARKTIME(t1) ;
686 FrontMtx_MPI_split(frontmtx, solvemap,
687                    stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
688 MARKTIME(t2) ;
689 fprintf(msgFile, "\n\n CPU %8.3f : front matrix split", t2 - t1) ;
690 fprintf(msgFile,
691         "\n Split front matrix communication for this process"
692         "\n                 # sent   bytes sent   # recv   bytes recv"
693         "\n local comm  : %6d %12d   %6d %12d",
694         stats[0],  stats[2],  stats[1],  stats[3]) ;
695 fflush(msgFile) ;
696 MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
697           MPI_SUM, 0, MPI_COMM_WORLD) ;
698 if ( myid == 0 ) {
699    fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
700            tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
701    fflush(msgFile) ;
702 }
703 if ( msglvl > 1 ) {
704    fprintf(msgFile, "\n\n after splitting, front factor matrix") ;
705    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
706    fflush(msgFile) ;
707 }
708 /*
709    -----------------------
710    solve the linear system
711    -----------------------
712 */
713 solvemanager = SubMtxManager_new() ;
714 SubMtxManager_init(solvemanager, 0, 0) ;
715 IVzero(8,  stats) ;
716 IVzero(8,  tstats) ;
717 DVzero(5, cpus) ;
718 mtxZ = DenseMtx_new() ;
719 DenseMtx_init(mtxZ, type, 0, 0, mtxX->nrow, mtxX->ncol, 1, mtxX->nrow) ;
720 DenseMtx_zero(mtxZ) ;
721 DenseMtx_rowIndices(mtxX, &nrow, &rowindX) ;
722 DenseMtx_rowIndices(mtxZ, &nrow, &rowindZ) ;
723 IVcopy(nrow, rowindZ, rowindX) ;
724 MARKTIME(t1) ;
725 FrontMtx_MPI_solve(frontmtx, mtxZ, mtxB, solvemanager, solvemap,
726                    cpus, stats, msglvl, msgFile, tag, MPI_COMM_WORLD);
727 MARKTIME(t2) ;
728 solvecpu = t2 - t1 ;
729 fprintf(msgFile, "\n CPU %8.3f : solve done ", solvecpu) ;
730 /*
731 fprintf(msgFile, "\n\n cpus") ;
732 DVfprintf(msgFile, 6, cpus) ;
733 */
734 DVzero(6, tcpus) ;
735 MPI_Reduce((void *) cpus, (void *) tcpus, 6, MPI_DOUBLE,
736            MPI_SUM, 0, MPI_COMM_WORLD) ;
737 if ( myid == 0 ) {
738    fprintf(msgFile, ", %.3f mflops", 1.e-6*solveops/(t2-t1)) ;
739 /*
740    fprintf(msgFile, "\n\n tcpus") ;
741    DVfprintf(msgFile, 6, tcpus) ;
742 */
743    solvecpu = DVsum(6, tcpus) ;
744    fprintf(msgFile, "\n\n Solve CPU breakdown:") ;
745    if ( solvecpu > 0.0 ) {
746       fprintf(msgFile,
747       "\n    set up solves               %8.3f %6.2f"
748       "\n    load rhs and store solution %8.3f %6.2f"
749       "\n    forward solve               %8.3f %6.2f"
750       "\n    diagonal solve              %8.3f %6.2f"
751       "\n    backward solve              %8.3f %6.2f"
752       "\n    miscellaneous               %8.3f %6.2f"
753       "\n    total time                  %8.3f",
754       tcpus[0], 100.*tcpus[0]/solvecpu,
755       tcpus[1], 100.*tcpus[1]/solvecpu,
756       tcpus[2], 100.*tcpus[2]/solvecpu,
757       tcpus[3], 100.*tcpus[3]/solvecpu,
758       tcpus[4], 100.*tcpus[4]/solvecpu,
759       tcpus[5], 100.*tcpus[5]/solvecpu, solvecpu) ;
760    }
761 }
762 fprintf(msgFile,
763         "\n\n Solve communication for this processor"
764         "\n                   aggregates              solution"
765         "\n                   #    # bytes          #     #bytes"
766         "\n   send   %10d %10d %10d %10d"
767         "\n   recv   %10d %10d %10d %10d",
768         stats[1], stats[3], stats[0], stats[2],
769         stats[5], stats[7], stats[4], stats[6]) ;
770 fflush(msgFile) ;
771 MPI_Reduce((void *) stats, (void *) tstats, 8, MPI_INT,
772           MPI_SUM, 0, MPI_COMM_WORLD) ;
773 if ( myid == 0 ) {
774    fprintf(msgFile,
775            "\n\n Solve communication for all processors"
776            "\n                   aggregates              solution"
777            "\n                   #    # bytes          #     #bytes"
778            "\n   send   %10d %10d %10d %10d"
779            "\n   recv   %10d %10d %10d %10d",
780            tstats[1], tstats[3], tstats[0], tstats[2],
781            tstats[5], tstats[7], tstats[4], tstats[6]) ;
782    fflush(msgFile) ;
783 }
784 SubMtxManager_writeForHumanEye(solvemanager, msgFile) ;
785 if ( msglvl > 2 ) {
786    fprintf(msgFile, "\n\n computed solution") ;
787    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
788    fflush(msgFile) ;
789 }
790 DenseMtx_sub(mtxZ, mtxX) ;
791 error  = DenseMtx_maxabs(mtxZ) ;
792 fprintf(msgFile, "\n\n local error  = %12.4e", error) ;
793 MPI_Reduce((void *) &error, (void *) &terror, 1, MPI_DOUBLE,
794            MPI_MAX, 0, MPI_COMM_WORLD) ;
795 if ( myid == 0 ) {
796    fprintf(msgFile, "\n global error = %12.4e", terror) ;
797    fflush(msgFile) ;
798 }
799 if ( msglvl > 2 ) {
800    fprintf(msgFile, "\n\n error matrix") ;
801    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
802    fflush(msgFile) ;
803 }
804 /*
805    ------------------------
806    free the working storage
807    ------------------------
808 */
809 DenseMtx_free(mtxX) ;
810 DenseMtx_free(mtxB) ;
811 DenseMtx_free(mtxZ) ;
812 InpMtx_free(mtxA) ;
813 SubMtxManager_free(factormanager) ;
814 SubMtxManager_free(solvemanager) ;
815 ChvManager_free(chvmanager) ;
816 FrontMtx_free(frontmtx) ;
817 ETree_free(frontETree) ;
818 IVL_free(symbfacIVL) ;
819 IV_free(vtxmapIV) ;
820 IV_free(frontOwnersIV) ;
821 SolveMap_free(solvemap) ;
822 
823 MPI_Barrier(MPI_COMM_WORLD) ;
824 
825 fclose(msgFile) ;
826 
827 MPI_Finalize() ;
828 
829 return(1) ; }
830 
831 /*--------------------------------------------------------------------*/
832