1 /*  testFactorMT.c  */
2 
3 #include "../spoolesMT.h"
4 #include "../../FrontMtx.h"
5 #include "../../Drand.h"
6 #include "../../SymbFac.h"
7 #include "../../timings.h"
8 #include "../../misc.h"
9 
10 /*--------------------------------------------------------------------*/
11 
12 int
main(int argc,char * argv[])13 main ( int argc, char *argv[] )
14 /*
15    -----------------------------------------------------
16    test the factor method for a grid matrix
17    (1) read in an InpMtx object
18    (2) read in an ETree object
19    (3) create a solution matrix object
20    (4) multiply the solution with the matrix
21        to get a right hand side matrix object
22    (5) factor the matrix
23    (6) solve the system
24 
25    created -- 98sep05, cca
26    -----------------------------------------------------
27 */
28 {
29 char            *etreeFileName, *mtxFileName ;
30 Chv             *chv, *rootchv ;
31 ChvManager      *chvmanager ;
32 DenseMtx        *mtxB, *mtxX, *mtxZ ;
33 double          one[2] = { 1.0, 0.0 }, cpus[11] ;
34 double          cputotal, cutoff, droptol, factorops ;
35 Drand           drand ;
36 double          nops, tau, t1, t2   ;
37 DV              *cumopsDV ;
38 ETree           *frontETree   ;
39 FILE            *msgFile ;
40 FrontMtx        *frontmtx ;
41 InpMtx          *mtxA ;
42 int             error, loc, lookahead, maptype, msglvl, neqns, nrhs,
43                 nthread, nzf, pivotingflag, rc, seed, sparsityflag,
44                 symmetryflag, type ;
45 int             stats[20] ;
46 IV              *frontOwnersIV, *newToOldIV, *oldToNewIV ;
47 IVL             *symbfacIVL ;
48 SolveMap        *solvemap ;
49 SubMtxManager   *mtxmanager ;
50 
51 if ( argc != 16 ) {
52    fprintf(stdout,
53 "\n\n usage : %s msglvl msgFile mtxFile etreeFile"
54 "\n         seed symmetryflag sparsityflag "
55 "\n         pivotingflag tau droptol nrhs "
56 "\n         nthread maptype cutoff lookahead"
57 "\n    msglvl    -- message level"
58 "\n    msgFile   -- message file"
59 "\n    mtxFile   -- file to read in InpMtx matrix object"
60 "\n    etreeFile -- file to read in ETree front tree object"
61 "\n    seed      -- random number seed"
62 "\n    symmetryflag -- symmetry flag"
63 "\n       0 --> symmetric "
64 "\n       1 --> hermitian"
65 "\n       2 --> nonsymmetric"
66 "\n    sparsityflag -- sparsity flag"
67 "\n       0 --> store dense fronts"
68 "\n       1 --> store sparse fronts, use droptol to drop entries"
69 "\n    pivotingflag -- pivoting flag"
70 "\n       0 --> do not pivot"
71 "\n       1 --> enable pivoting"
72 "\n    tau     -- upper bound on factor entries"
73 "\n               used only with pivoting"
74 "\n    droptol -- lower bound on factor entries"
75 "\n               used only with sparse fronts"
76 "\n    nrhs     -- # of right hand sides"
77 "\n    nthread -- number of threads"
78 "\n    maptype -- type of map from fronts to threads"
79 "\n       1 --> wrap map"
80 "\n       2 --> balanced map via a post-order traversal"
81 "\n       3 --> subtree-subset map"
82 "\n       4 --> domain decomposition map"
83 "\n    cutoff  -- cutoff used for domain decomposition map"
84 "\n       0 <= cutoff <= 1 used to define the multisector"
85 "\n lookahead -- lookahead for controlling the parallelism"
86 "\n", argv[0]) ;
87    return(-1) ;
88 }
89 msglvl = atoi(argv[1]) ;
90 if ( strcmp(argv[2], "stdout") == 0 ) {
91    msgFile = stdout ;
92 } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) {
93    fprintf(stderr, "\n fatal error in %s"
94            "\n unable to open file %s\n",
95            argv[0], argv[2]) ;
96    return(-1) ;
97 }
98 mtxFileName   = argv[3] ;
99 etreeFileName = argv[4] ;
100 seed          = atoi(argv[5]) ;
101 symmetryflag  = atoi(argv[6]) ;
102 sparsityflag  = atoi(argv[7]) ;
103 pivotingflag  = atoi(argv[8]) ;
104 tau           = atof(argv[9]) ;
105 droptol       = atof(argv[10]) ;
106 nrhs          = atoi(argv[11]) ;
107 nthread       = atoi(argv[12]) ;
108 maptype       = atoi(argv[13]) ;
109 cutoff        = atof(argv[14]) ;
110 lookahead     = atoi(argv[15]) ;
111 fprintf(msgFile,
112         "\n %s "
113         "\n msglvl        -- %d"
114         "\n msgFile       -- %s"
115         "\n mtxFileName   -- %s"
116         "\n etreeFileName -- %s"
117         "\n seed          -- %d"
118         "\n symmetryflag  -- %d"
119         "\n sparsityflag  -- %d"
120         "\n pivotingflag  -- %d"
121         "\n tau           -- %e"
122         "\n droptol       -- %e"
123         "\n nrhs          -- %d"
124         "\n nthread       -- %d"
125         "\n maptype       -- %d"
126         "\n cutoff        -- %f"
127         "\n lookahead     -- %d"
128         "\n",
129         argv[0], msglvl, argv[2], mtxFileName, etreeFileName,
130         seed, symmetryflag, sparsityflag, pivotingflag,
131         tau, droptol, nrhs, nthread, maptype, cutoff, lookahead) ;
132 fflush(msgFile) ;
133 /*
134    --------------------------------------
135    initialize the random number generator
136    --------------------------------------
137 */
138 Drand_setDefaultFields(&drand) ;
139 Drand_init(&drand) ;
140 Drand_setSeed(&drand, seed) ;
141 /*
142 Drand_setUniform(&drand, 0.0, 1.0) ;
143 */
144 Drand_setNormal(&drand, 0.0, 1.0) ;
145 /*
146    -------------------------
147    read in the InpMtx object
148    -------------------------
149 */
150 if ( strcmp(mtxFileName, "none") == 0 ) {
151    fprintf(msgFile, "\n no file to read from") ;
152    exit(0) ;
153 }
154 mtxA = InpMtx_new() ;
155 MARKTIME(t1) ;
156 rc = InpMtx_readFromFile(mtxA, mtxFileName) ;
157 MARKTIME(t2) ;
158 fprintf(msgFile, "\n CPU %8.3f : read in mtxA from file %s",
159         t2 - t1, mtxFileName) ;
160 if ( rc != 1 ) {
161    fprintf(msgFile,
162            "\n return value %d from InpMtx_readFromFile(%p,%s)",
163            rc, mtxA, mtxFileName) ;
164    exit(-1) ;
165 }
166 type = mtxA->inputMode ;
167 neqns = 1 + IVmax(mtxA->nent, InpMtx_ivec1(mtxA), &loc) ;
168 if ( msglvl > 1 ) {
169    fprintf(msgFile, "\n\n after reading InpMtx object from file %s",
170            mtxFileName) ;
171    if ( msglvl == 2 ) {
172       InpMtx_writeStats(mtxA, msgFile) ;
173    } else {
174       InpMtx_writeForHumanEye(mtxA, msgFile) ;
175    }
176    fflush(msgFile) ;
177 }
178 if ( INPMTX_IS_BY_ROWS(mtxA) ) {
179    fprintf(msgFile, "\n matrix coordinate type is rows") ;
180 } else if ( INPMTX_IS_BY_COLUMNS(mtxA) ) {
181    fprintf(msgFile, "\n matrix coordinate type is columns") ;
182 } else if ( INPMTX_IS_BY_CHEVRONS(mtxA) ) {
183    fprintf(msgFile, "\n matrix coordinate type is chevrons") ;
184 } else {
185    fprintf(msgFile, "\n\n, error, bad coordinate type") ;
186    exit(-1) ;
187 }
188 if ( INPMTX_IS_RAW_DATA(mtxA) ) {
189    fprintf(msgFile, "\n matrix storage mode is raw data\n") ;
190 } else if ( INPMTX_IS_SORTED(mtxA) ) {
191    fprintf(msgFile, "\n matrix storage mode is sorted\n") ;
192 } else if ( INPMTX_IS_BY_VECTORS(mtxA) ) {
193    fprintf(msgFile, "\n matrix storage mode is by vectors\n") ;
194 } else {
195    fprintf(msgFile, "\n\n, error, bad storage mode") ;
196    exit(-1) ;
197 }
198 /*
199    --------------------------------------------------------
200    generate the linear system
201    1. generate solution matrix and fill with random numbers
202    2. generate rhs matrix and fill with zeros
203    3. compute matrix-matrix multiply
204    --------------------------------------------------------
205 */
206 MARKTIME(t1) ;
207 mtxX = DenseMtx_new() ;
208 DenseMtx_init(mtxX, type, 0, -1, neqns, nrhs, 1, neqns) ;
209 DenseMtx_fillRandomEntries(mtxX, &drand) ;
210 mtxB = DenseMtx_new() ;
211 DenseMtx_init(mtxB, type, 1, -1, neqns, nrhs, 1, neqns) ;
212 DenseMtx_zero(mtxB) ;
213 switch ( symmetryflag ) {
214 case SPOOLES_SYMMETRIC :
215    InpMtx_sym_mmm(mtxA, mtxB, one, mtxX) ;
216    break ;
217 case SPOOLES_HERMITIAN :
218    InpMtx_herm_mmm(mtxA, mtxB, one, mtxX) ;
219    break ;
220 case SPOOLES_NONSYMMETRIC :
221    InpMtx_nonsym_mmm(mtxA, mtxB, one, mtxX) ;
222    break ;
223 default :
224    break ;
225 }
226 MARKTIME(t2) ;
227 fprintf(msgFile, "\n CPU %8.3f : set up the solution and rhs ",
228         t2 - t1) ;
229 if ( msglvl > 2 ) {
230    fprintf(msgFile, "\n\n original mtxX") ;
231    DenseMtx_writeForHumanEye(mtxX, msgFile) ;
232    fprintf(msgFile, "\n\n original mtxB") ;
233    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
234    fflush(msgFile) ;
235 }
236 /*
237    ------------------------
238    read in the ETree object
239    ------------------------
240 */
241 if ( strcmp(etreeFileName, "none") == 0 ) {
242    fprintf(msgFile, "\n no file to read from") ;
243    exit(0) ;
244 }
245 frontETree = ETree_new() ;
246 MARKTIME(t1) ;
247 rc = ETree_readFromFile(frontETree, etreeFileName) ;
248 MARKTIME(t2) ;
249 fprintf(msgFile, "\n CPU %8.3f : read in frontETree from file %s",
250         t2 - t1, etreeFileName) ;
251 if ( rc != 1 ) {
252    fprintf(msgFile, "\n return value %d from ETree_readFromFile(%p,%s)",
253            rc, frontETree, etreeFileName) ;
254    exit(-1) ;
255 }
256 ETree_leftJustify(frontETree) ;
257 if ( msglvl > 1 ) {
258    fprintf(msgFile, "\n\n after reading ETree object from file %s",
259            etreeFileName) ;
260    if ( msglvl == 2 ) {
261       ETree_writeStats(frontETree, msgFile) ;
262    } else {
263       ETree_writeForHumanEye(frontETree, msgFile) ;
264    }
265 }
266 fflush(msgFile) ;
267 /*
268    --------------------------------------------------
269    get the permutations, permute the matrix and the
270    front tree, and compute the symbolic factorization
271    --------------------------------------------------
272 */
273 MARKTIME(t1) ;
274 oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ;
275 newToOldIV = ETree_newToOldVtxPerm(frontETree) ;
276 MARKTIME(t2) ;
277 fprintf(msgFile, "\n CPU %8.3f : get permutations", t2 - t1) ;
278 MARKTIME(t1) ;
279 ETree_permuteVertices(frontETree, oldToNewIV) ;
280 MARKTIME(t2) ;
281 fprintf(msgFile, "\n CPU %8.3f : permute front tree", t2 - t1) ;
282 MARKTIME(t1) ;
283 InpMtx_permute(mtxA, IV_entries(oldToNewIV), IV_entries(oldToNewIV)) ;
284 MARKTIME(t2) ;
285 fprintf(msgFile, "\n CPU %8.3f : permute mtxA", t2 - t1) ;
286 MARKTIME(t1) ;
287 InpMtx_mapToUpperTriangle(mtxA) ;
288 MARKTIME(t2) ;
289 fprintf(msgFile, "\n CPU %8.3f : map to upper triangle", t2 - t1) ;
290 if ( ! INPMTX_IS_BY_CHEVRONS(mtxA) ) {
291    MARKTIME(t1) ;
292    InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ;
293    MARKTIME(t2) ;
294    fprintf(msgFile, "\n CPU %8.3f : change coordinate type", t2 - t1) ;
295 }
296 if ( INPMTX_IS_RAW_DATA(mtxA) ) {
297    MARKTIME(t1) ;
298    InpMtx_changeStorageMode(mtxA, INPMTX_SORTED) ;
299    MARKTIME(t2) ;
300    fprintf(msgFile, "\n CPU %8.3f : sort entries ", t2 - t1) ;
301 }
302 if ( INPMTX_IS_SORTED(mtxA) ) {
303    MARKTIME(t1) ;
304    InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
305    MARKTIME(t2) ;
306    fprintf(msgFile, "\n CPU %8.3f : convert to vectors ", t2 - t1) ;
307 }
308 MARKTIME(t1) ;
309 symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA) ;
310 MARKTIME(t2) ;
311 fprintf(msgFile, "\n CPU %8.3f : symbolic factorization", t2 - t1) ;
312 MARKTIME(t1) ;
313 DenseMtx_permuteRows(mtxB, oldToNewIV) ;
314 MARKTIME(t2) ;
315 fprintf(msgFile, "\n CPU %8.3f : permute rhs", t2 - t1) ;
316 /*
317    --------------------------------------------------
318    initialize the cumulative operations metric object
319    --------------------------------------------------
320 */
321 cumopsDV = DV_new() ;
322 DV_init(cumopsDV, nthread, NULL) ;
323 DV_fill(cumopsDV, 0.0) ;
324 /*
325    -------------------------------
326    create the owners map IV object
327    -------------------------------
328 */
329 switch ( maptype ) {
330 case 1 :
331    frontOwnersIV = ETree_wrapMap(frontETree, type,
332                                  symmetryflag, cumopsDV) ;
333    break ;
334 case 2 :
335    frontOwnersIV = ETree_balancedMap(frontETree, type,
336                                  symmetryflag, cumopsDV) ;
337    break ;
338 case 3 :
339    frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type,
340                                  symmetryflag, cumopsDV) ;
341    break ;
342 case 4 :
343    frontOwnersIV = ETree_ddMap(frontETree, type,
344                                symmetryflag, cumopsDV, cutoff) ;
345    break ;
346 }
347 if ( msglvl > 0 ) {
348    fprintf(msgFile, "\n\n totalOps = %.0f", DV_sum(cumopsDV)) ;
349    DVscale(DV_size(cumopsDV), DV_entries(cumopsDV),
350             nthread/DV_sum(cumopsDV)) ;
351    fprintf(msgFile, "\n\n cumopsDV") ;
352    DV_writeForHumanEye(cumopsDV, msgFile) ;
353 }
354 DV_free(cumopsDV) ;
355 if ( msglvl > 1 ) {
356    fprintf(msgFile, "\n\n frontOwnersIV") ;
357    IV_writeForHumanEye(frontOwnersIV, msgFile) ;
358    fflush(msgFile) ;
359 }
360 /*
361    ------------------------------
362    initialize the FrontMtx object
363    ------------------------------
364 */
365 MARKTIME(t1) ;
366 frontmtx   = FrontMtx_new() ;
367 mtxmanager = SubMtxManager_new() ;
368 SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ;
369 FrontMtx_init(frontmtx, frontETree, symbfacIVL,
370               type, symmetryflag, sparsityflag, pivotingflag,
371               LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ;
372 MARKTIME(t2) ;
373 fprintf(msgFile, "\n\n CPU %8.3f : initialize the front matrix",
374         t2 - t1) ;
375 if ( msglvl > 1 ) {
376    fprintf(msgFile,
377            "\n nendD  = %d, nentL = %d, nentU = %d",
378            frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ;
379 }
380 if ( msglvl > 2 ) {
381    fprintf(msgFile, "\n front matrix initialized") ;
382    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
383    fflush(msgFile) ;
384 }
385 SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ;
386 /*
387    -----------------
388    factor the matrix
389    -----------------
390 */
391 nzf       = ETree_nFactorEntries(frontETree, symmetryflag) ;
392 factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ;
393 fprintf(msgFile,
394         "\n %d factor entries, %.0f factor ops, %8.3f ratio",
395         nzf, factorops, factorops/nzf) ;
396 IVzero(16, stats) ;
397 DVzero(11, cpus) ;
398 chvmanager = ChvManager_new() ;
399 ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ;
400 MARKTIME(t1) ;
401 error = -1 ;
402 rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, tau, droptol,
403                                 chvmanager, frontOwnersIV, lookahead,
404                                 &error, cpus, stats, msglvl, msgFile) ;
405 MARKTIME(t2) ;
406 fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops",
407         t2 - t1, 1.e-6*factorops/(t2-t1)) ;
408 if ( rootchv != NULL ) {
409    fprintf(msgFile, "\n\n factorization did not complete") ;
410    for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
411       fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
412               chv->id, chv->nD, chv->nL, chv->nU) ;
413    }
414 }
415 if ( error >= 0 ) {
416    fprintf(msgFile, "\n\n error encountered at front %d\n", error) ;
417    exit(-1) ;
418 }
419 fprintf(msgFile,
420         "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns",
421         stats[0], stats[1], stats[2]) ;
422 if ( frontmtx->rowadjIVL != NULL ) {
423    fprintf(msgFile,
424            "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ;
425 }
426 if ( frontmtx->coladjIVL != NULL ) {
427    fprintf(msgFile,
428            ", %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ;
429 }
430 if ( frontmtx->upperblockIVL != NULL ) {
431    fprintf(msgFile,
432            "\n %d fronts, %d entries in upperblockIVL",
433            frontmtx->nfront, frontmtx->upperblockIVL->tsize) ;
434 }
435 if ( frontmtx->lowerblockIVL != NULL ) {
436    fprintf(msgFile,
437            ", %d entries in lowerblockIVL",
438            frontmtx->lowerblockIVL->tsize) ;
439 }
440 fprintf(msgFile,
441         "\n %d entries in D, %d entries in L, %d entries in U",
442         stats[3], stats[4], stats[5]) ;
443 fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ;
444 cputotal = cpus[10] ;
445 if ( cputotal > 0.0 ) {
446    fprintf(msgFile,
447    "\n    manage working storage  %8.3f %6.2f"
448    "\n    initialize/load fronts  %8.3f %6.2f"
449    "\n    update fronts           %8.3f %6.2f"
450    "\n    aggregate insert        %8.3f %6.2f"
451    "\n    aggregate remove/add    %8.3f %6.2f"
452    "\n    assemble postponed data %8.3f %6.2f"
453    "\n    factor fronts           %8.3f %6.2f"
454    "\n    extract postponed data  %8.3f %6.2f"
455    "\n    store factor entries    %8.3f %6.2f"
456    "\n    miscellaneous           %8.3f %6.2f"
457    "\n    total time              %8.3f",
458    cpus[0], 100.*cpus[0]/cputotal,
459    cpus[1], 100.*cpus[1]/cputotal,
460    cpus[2], 100.*cpus[2]/cputotal,
461    cpus[3], 100.*cpus[3]/cputotal,
462    cpus[4], 100.*cpus[4]/cputotal,
463    cpus[5], 100.*cpus[5]/cputotal,
464    cpus[6], 100.*cpus[6]/cputotal,
465    cpus[7], 100.*cpus[7]/cputotal,
466    cpus[8], 100.*cpus[8]/cputotal,
467    cpus[9], 100.*cpus[9]/cputotal, cputotal) ;
468 }
469 SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ;
470 ChvManager_writeForHumanEye(chvmanager, msgFile) ;
471 if ( msglvl > 2 ) {
472    fprintf(msgFile, "\n\n front factor matrix") ;
473    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
474 }
475 /*
476    ------------------------------
477    post-process the factor matrix
478    ------------------------------
479 */
480 MARKTIME(t1) ;
481 FrontMtx_postProcess(frontmtx, msglvl, msgFile) ;
482 MARKTIME(t2) ;
483 fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ;
484 if ( msglvl > 2 ) {
485    fprintf(msgFile, "\n\n front factor matrix after post-processing") ;
486    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
487 }
488 fprintf(msgFile, "\n\n after post-processing") ;
489 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
490 /*
491    ----------------
492    solve the system
493    ----------------
494 */
495 neqns = mtxB->nrow ;
496 nrhs  = mtxB->ncol ;
497 mtxZ  = DenseMtx_new() ;
498 DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ;
499 DenseMtx_zero(mtxZ) ;
500 if ( type == SPOOLES_REAL ) {
501    nops = frontmtx->nentD + 2*frontmtx->nentU ;
502    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
503       nops += 2*frontmtx->nentL ;
504    } else {
505       nops += 2*frontmtx->nentU ;
506    }
507 } else if ( type == SPOOLES_COMPLEX ) {
508    nops = 8*frontmtx->nentD + 8*frontmtx->nentU ;
509    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
510       nops += 8*frontmtx->nentL ;
511    } else {
512       nops += 8*frontmtx->nentU ;
513    }
514 }
515 nops *= nrhs ;
516 if ( msglvl > 2 ) {
517    fprintf(msgFile, "\n\n rhs") ;
518    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
519    fflush(stdout) ;
520 }
521 DVzero(6, cpus) ;
522 MARKTIME(t1) ;
523 FrontMtx_solve(frontmtx, mtxZ, mtxB, mtxmanager,
524                cpus, msglvl, msgFile) ;
525 MARKTIME(t2) ;
526 fprintf(msgFile, "\n\n CPU %8.3f : solve the system, %.3f mflops",
527         t2 - t1, 1.e-6*nops/(t2 - t1)) ;
528 cputotal = t2 - t1 ;
529 if ( cputotal > 0.0 ) {
530    fprintf(msgFile,
531    "\n    set up solves               %8.3f %6.2f"
532    "\n    load rhs and store solution %8.3f %6.2f"
533    "\n    forward solve               %8.3f %6.2f"
534    "\n    diagonal solve              %8.3f %6.2f"
535    "\n    backward solve              %8.3f %6.2f"
536    "\n    total time                  %8.3f",
537    cpus[0], 100.*cpus[0]/cputotal,
538    cpus[1], 100.*cpus[1]/cputotal,
539    cpus[2], 100.*cpus[2]/cputotal,
540    cpus[3], 100.*cpus[3]/cputotal,
541    cpus[4], 100.*cpus[4]/cputotal, cputotal) ;
542 }
543 if ( msglvl > 2 ) {
544    fprintf(msgFile, "\n\n computed solution") ;
545    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
546    fflush(stdout) ;
547 }
548 /*
549    -------------------------------------------------------------
550    permute the computed solution back into the original ordering
551    -------------------------------------------------------------
552 */
553 MARKTIME(t1) ;
554 DenseMtx_permuteRows(mtxZ, newToOldIV) ;
555 MARKTIME(t2) ;
556 fprintf(msgFile, "\n CPU %8.3f : permute solution", t2 - t1) ;
557 /*
558    -----------------
559    compute the error
560    -----------------
561 */
562 DenseMtx_sub(mtxZ, mtxX) ;
563 fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ;
564 if ( msglvl > 2 ) {
565    fprintf(msgFile, "\n\n error") ;
566    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
567    fflush(stdout) ;
568 }
569 fprintf(msgFile, "\n\n after solve") ;
570 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
571 /*
572    ----------------------------
573    solve the system in parallel
574    ----------------------------
575 */
576 solvemap = SolveMap_new() ;
577 if (  FRONTMTX_IS_NONSYMMETRIC(frontmtx)
578    && FRONTMTX_IS_PIVOTING(frontmtx) ) {
579    SolveMap_ddMap(solvemap, SPOOLES_NONSYMMETRIC,
580                   FrontMtx_upperBlockIVL(frontmtx),
581                   FrontMtx_lowerBlockIVL(frontmtx),
582                   nthread, frontOwnersIV, frontmtx->tree,
583                   seed, msglvl, msgFile) ;
584 } else {
585    SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC,
586                   FrontMtx_upperBlockIVL(frontmtx), NULL,
587                   nthread, frontOwnersIV, frontmtx->tree,
588                   seed, msglvl, msgFile) ;
589 }
590 fprintf(msgFile, "\n solve map created") ;
591 fflush(msgFile) ;
592 neqns = mtxB->nrow ;
593 nrhs  = mtxB->ncol ;
594 DenseMtx_zero(mtxZ) ;
595 if ( type == SPOOLES_REAL ) {
596    nops = frontmtx->nentD + 2*frontmtx->nentU ;
597    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
598       nops += 2*frontmtx->nentL ;
599    } else {
600       nops += 2*frontmtx->nentU ;
601    }
602 } else if ( type == SPOOLES_COMPLEX ) {
603    nops = 8*frontmtx->nentD + 8*frontmtx->nentU ;
604    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
605       nops += 8*frontmtx->nentL ;
606    } else {
607       nops += 8*frontmtx->nentU ;
608    }
609 }
610 nops *= nrhs ;
611 if ( msglvl > 2 ) {
612    fprintf(msgFile, "\n\n rhs") ;
613    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
614    fflush(stdout) ;
615 }
616 DVzero(6, cpus) ;
617 MARKTIME(t1) ;
618 FrontMtx_MT_solve(frontmtx, mtxZ, mtxB, mtxmanager, solvemap,
619                   cpus, msglvl, msgFile) ;
620 MARKTIME(t2) ;
621 fprintf(msgFile, "\n\n CPU %8.3f : solve the system, %.3f mflops",
622         t2 - t1, 1.e-6*nops/(t2 - t1)) ;
623 cputotal = t2 - t1 ;
624 if ( cputotal > 0.0 ) {
625    fprintf(msgFile,
626    "\n    set up solves               %8.3f %6.2f"
627    "\n    load rhs and store solution %8.3f %6.2f"
628    "\n    forward solve               %8.3f %6.2f"
629    "\n    diagonal solve              %8.3f %6.2f"
630    "\n    backward solve              %8.3f %6.2f"
631    "\n    total time                  %8.3f",
632    cpus[0], 100.*cpus[0]/cputotal,
633    cpus[1], 100.*cpus[1]/cputotal,
634    cpus[2], 100.*cpus[2]/cputotal,
635    cpus[3], 100.*cpus[3]/cputotal,
636    cpus[4], 100.*cpus[4]/cputotal, cputotal) ;
637 }
638 if ( msglvl > 2 ) {
639    fprintf(msgFile, "\n\n computed solution") ;
640    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
641    fflush(stdout) ;
642 }
643 /*
644    -------------------------------------------------------------
645    permute the computed solution back into the original ordering
646    -------------------------------------------------------------
647 */
648 MARKTIME(t1) ;
649 DenseMtx_permuteRows(mtxZ, newToOldIV) ;
650 MARKTIME(t2) ;
651 fprintf(msgFile, "\n CPU %8.3f : permute solution", t2 - t1) ;
652 /*
653    -----------------
654    compute the error
655    -----------------
656 */
657 DenseMtx_sub(mtxZ, mtxX) ;
658 fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ;
659 if ( msglvl > 2 ) {
660    fprintf(msgFile, "\n\n error") ;
661    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
662    fflush(stdout) ;
663 }
664 fprintf(msgFile, "\n\n after solve") ;
665 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
666 /*
667    ------------------------
668    free the working storage
669    ------------------------
670 */
671 IV_free(oldToNewIV) ;
672 IV_free(newToOldIV) ;
673 IV_free(frontOwnersIV) ;
674 InpMtx_free(mtxA) ;
675 DenseMtx_free(mtxX) ;
676 DenseMtx_free(mtxB) ;
677 DenseMtx_free(mtxZ) ;
678 FrontMtx_free(frontmtx) ;
679 ETree_free(frontETree) ;
680 IVL_free(symbfacIVL) ;
681 ChvManager_free(chvmanager) ;
682 SubMtxManager_free(mtxmanager) ;
683 SolveMap_free(solvemap) ;
684 
685 fprintf(msgFile, "\n") ;
686 fclose(msgFile) ;
687 
688 return(1) ; }
689 
690 /*--------------------------------------------------------------------*/
691