1 /* testFactorMT.c */
2
3 #include "../spoolesMT.h"
4 #include "../../FrontMtx.h"
5 #include "../../Drand.h"
6 #include "../../SymbFac.h"
7 #include "../../timings.h"
8 #include "../../misc.h"
9
10 /*--------------------------------------------------------------------*/
11
12 int
main(int argc,char * argv[])13 main ( int argc, char *argv[] )
14 /*
15 -----------------------------------------------------
16 test the factor method for a grid matrix
17 (1) read in an InpMtx object
18 (2) read in an ETree object
19 (3) create a solution matrix object
20 (4) multiply the solution with the matrix
21 to get a right hand side matrix object
22 (5) factor the matrix
23 (6) solve the system
24
25 created -- 98sep05, cca
26 -----------------------------------------------------
27 */
28 {
29 char *etreeFileName, *mtxFileName ;
30 Chv *chv, *rootchv ;
31 ChvManager *chvmanager ;
32 DenseMtx *mtxB, *mtxX, *mtxZ ;
33 double one[2] = { 1.0, 0.0 }, cpus[11] ;
34 double cputotal, cutoff, droptol, factorops ;
35 Drand drand ;
36 double nops, tau, t1, t2 ;
37 DV *cumopsDV ;
38 ETree *frontETree ;
39 FILE *msgFile ;
40 FrontMtx *frontmtx ;
41 InpMtx *mtxA ;
42 int error, loc, lookahead, maptype, msglvl, neqns, nrhs,
43 nthread, nzf, pivotingflag, rc, seed, sparsityflag,
44 symmetryflag, type ;
45 int stats[20] ;
46 IV *frontOwnersIV, *newToOldIV, *oldToNewIV ;
47 IVL *symbfacIVL ;
48 SolveMap *solvemap ;
49 SubMtxManager *mtxmanager ;
50
51 if ( argc != 16 ) {
52 fprintf(stdout,
53 "\n\n usage : %s msglvl msgFile mtxFile etreeFile"
54 "\n seed symmetryflag sparsityflag "
55 "\n pivotingflag tau droptol nrhs "
56 "\n nthread maptype cutoff lookahead"
57 "\n msglvl -- message level"
58 "\n msgFile -- message file"
59 "\n mtxFile -- file to read in InpMtx matrix object"
60 "\n etreeFile -- file to read in ETree front tree object"
61 "\n seed -- random number seed"
62 "\n symmetryflag -- symmetry flag"
63 "\n 0 --> symmetric "
64 "\n 1 --> hermitian"
65 "\n 2 --> nonsymmetric"
66 "\n sparsityflag -- sparsity flag"
67 "\n 0 --> store dense fronts"
68 "\n 1 --> store sparse fronts, use droptol to drop entries"
69 "\n pivotingflag -- pivoting flag"
70 "\n 0 --> do not pivot"
71 "\n 1 --> enable pivoting"
72 "\n tau -- upper bound on factor entries"
73 "\n used only with pivoting"
74 "\n droptol -- lower bound on factor entries"
75 "\n used only with sparse fronts"
76 "\n nrhs -- # of right hand sides"
77 "\n nthread -- number of threads"
78 "\n maptype -- type of map from fronts to threads"
79 "\n 1 --> wrap map"
80 "\n 2 --> balanced map via a post-order traversal"
81 "\n 3 --> subtree-subset map"
82 "\n 4 --> domain decomposition map"
83 "\n cutoff -- cutoff used for domain decomposition map"
84 "\n 0 <= cutoff <= 1 used to define the multisector"
85 "\n lookahead -- lookahead for controlling the parallelism"
86 "\n", argv[0]) ;
87 return(-1) ;
88 }
89 msglvl = atoi(argv[1]) ;
90 if ( strcmp(argv[2], "stdout") == 0 ) {
91 msgFile = stdout ;
92 } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) {
93 fprintf(stderr, "\n fatal error in %s"
94 "\n unable to open file %s\n",
95 argv[0], argv[2]) ;
96 return(-1) ;
97 }
98 mtxFileName = argv[3] ;
99 etreeFileName = argv[4] ;
100 seed = atoi(argv[5]) ;
101 symmetryflag = atoi(argv[6]) ;
102 sparsityflag = atoi(argv[7]) ;
103 pivotingflag = atoi(argv[8]) ;
104 tau = atof(argv[9]) ;
105 droptol = atof(argv[10]) ;
106 nrhs = atoi(argv[11]) ;
107 nthread = atoi(argv[12]) ;
108 maptype = atoi(argv[13]) ;
109 cutoff = atof(argv[14]) ;
110 lookahead = atoi(argv[15]) ;
111 fprintf(msgFile,
112 "\n %s "
113 "\n msglvl -- %d"
114 "\n msgFile -- %s"
115 "\n mtxFileName -- %s"
116 "\n etreeFileName -- %s"
117 "\n seed -- %d"
118 "\n symmetryflag -- %d"
119 "\n sparsityflag -- %d"
120 "\n pivotingflag -- %d"
121 "\n tau -- %e"
122 "\n droptol -- %e"
123 "\n nrhs -- %d"
124 "\n nthread -- %d"
125 "\n maptype -- %d"
126 "\n cutoff -- %f"
127 "\n lookahead -- %d"
128 "\n",
129 argv[0], msglvl, argv[2], mtxFileName, etreeFileName,
130 seed, symmetryflag, sparsityflag, pivotingflag,
131 tau, droptol, nrhs, nthread, maptype, cutoff, lookahead) ;
132 fflush(msgFile) ;
133 /*
134 --------------------------------------
135 initialize the random number generator
136 --------------------------------------
137 */
138 Drand_setDefaultFields(&drand) ;
139 Drand_init(&drand) ;
140 Drand_setSeed(&drand, seed) ;
141 /*
142 Drand_setUniform(&drand, 0.0, 1.0) ;
143 */
144 Drand_setNormal(&drand, 0.0, 1.0) ;
145 /*
146 -------------------------
147 read in the InpMtx object
148 -------------------------
149 */
150 if ( strcmp(mtxFileName, "none") == 0 ) {
151 fprintf(msgFile, "\n no file to read from") ;
152 exit(0) ;
153 }
154 mtxA = InpMtx_new() ;
155 MARKTIME(t1) ;
156 rc = InpMtx_readFromFile(mtxA, mtxFileName) ;
157 MARKTIME(t2) ;
158 fprintf(msgFile, "\n CPU %8.3f : read in mtxA from file %s",
159 t2 - t1, mtxFileName) ;
160 if ( rc != 1 ) {
161 fprintf(msgFile,
162 "\n return value %d from InpMtx_readFromFile(%p,%s)",
163 rc, mtxA, mtxFileName) ;
164 exit(-1) ;
165 }
166 type = mtxA->inputMode ;
167 neqns = 1 + IVmax(mtxA->nent, InpMtx_ivec1(mtxA), &loc) ;
168 if ( msglvl > 1 ) {
169 fprintf(msgFile, "\n\n after reading InpMtx object from file %s",
170 mtxFileName) ;
171 if ( msglvl == 2 ) {
172 InpMtx_writeStats(mtxA, msgFile) ;
173 } else {
174 InpMtx_writeForHumanEye(mtxA, msgFile) ;
175 }
176 fflush(msgFile) ;
177 }
178 if ( INPMTX_IS_BY_ROWS(mtxA) ) {
179 fprintf(msgFile, "\n matrix coordinate type is rows") ;
180 } else if ( INPMTX_IS_BY_COLUMNS(mtxA) ) {
181 fprintf(msgFile, "\n matrix coordinate type is columns") ;
182 } else if ( INPMTX_IS_BY_CHEVRONS(mtxA) ) {
183 fprintf(msgFile, "\n matrix coordinate type is chevrons") ;
184 } else {
185 fprintf(msgFile, "\n\n, error, bad coordinate type") ;
186 exit(-1) ;
187 }
188 if ( INPMTX_IS_RAW_DATA(mtxA) ) {
189 fprintf(msgFile, "\n matrix storage mode is raw data\n") ;
190 } else if ( INPMTX_IS_SORTED(mtxA) ) {
191 fprintf(msgFile, "\n matrix storage mode is sorted\n") ;
192 } else if ( INPMTX_IS_BY_VECTORS(mtxA) ) {
193 fprintf(msgFile, "\n matrix storage mode is by vectors\n") ;
194 } else {
195 fprintf(msgFile, "\n\n, error, bad storage mode") ;
196 exit(-1) ;
197 }
198 /*
199 --------------------------------------------------------
200 generate the linear system
201 1. generate solution matrix and fill with random numbers
202 2. generate rhs matrix and fill with zeros
203 3. compute matrix-matrix multiply
204 --------------------------------------------------------
205 */
206 MARKTIME(t1) ;
207 mtxX = DenseMtx_new() ;
208 DenseMtx_init(mtxX, type, 0, -1, neqns, nrhs, 1, neqns) ;
209 DenseMtx_fillRandomEntries(mtxX, &drand) ;
210 mtxB = DenseMtx_new() ;
211 DenseMtx_init(mtxB, type, 1, -1, neqns, nrhs, 1, neqns) ;
212 DenseMtx_zero(mtxB) ;
213 switch ( symmetryflag ) {
214 case SPOOLES_SYMMETRIC :
215 InpMtx_sym_mmm(mtxA, mtxB, one, mtxX) ;
216 break ;
217 case SPOOLES_HERMITIAN :
218 InpMtx_herm_mmm(mtxA, mtxB, one, mtxX) ;
219 break ;
220 case SPOOLES_NONSYMMETRIC :
221 InpMtx_nonsym_mmm(mtxA, mtxB, one, mtxX) ;
222 break ;
223 default :
224 break ;
225 }
226 MARKTIME(t2) ;
227 fprintf(msgFile, "\n CPU %8.3f : set up the solution and rhs ",
228 t2 - t1) ;
229 if ( msglvl > 2 ) {
230 fprintf(msgFile, "\n\n original mtxX") ;
231 DenseMtx_writeForHumanEye(mtxX, msgFile) ;
232 fprintf(msgFile, "\n\n original mtxB") ;
233 DenseMtx_writeForHumanEye(mtxB, msgFile) ;
234 fflush(msgFile) ;
235 }
236 /*
237 ------------------------
238 read in the ETree object
239 ------------------------
240 */
241 if ( strcmp(etreeFileName, "none") == 0 ) {
242 fprintf(msgFile, "\n no file to read from") ;
243 exit(0) ;
244 }
245 frontETree = ETree_new() ;
246 MARKTIME(t1) ;
247 rc = ETree_readFromFile(frontETree, etreeFileName) ;
248 MARKTIME(t2) ;
249 fprintf(msgFile, "\n CPU %8.3f : read in frontETree from file %s",
250 t2 - t1, etreeFileName) ;
251 if ( rc != 1 ) {
252 fprintf(msgFile, "\n return value %d from ETree_readFromFile(%p,%s)",
253 rc, frontETree, etreeFileName) ;
254 exit(-1) ;
255 }
256 ETree_leftJustify(frontETree) ;
257 if ( msglvl > 1 ) {
258 fprintf(msgFile, "\n\n after reading ETree object from file %s",
259 etreeFileName) ;
260 if ( msglvl == 2 ) {
261 ETree_writeStats(frontETree, msgFile) ;
262 } else {
263 ETree_writeForHumanEye(frontETree, msgFile) ;
264 }
265 }
266 fflush(msgFile) ;
267 /*
268 --------------------------------------------------
269 get the permutations, permute the matrix and the
270 front tree, and compute the symbolic factorization
271 --------------------------------------------------
272 */
273 MARKTIME(t1) ;
274 oldToNewIV = ETree_oldToNewVtxPerm(frontETree) ;
275 newToOldIV = ETree_newToOldVtxPerm(frontETree) ;
276 MARKTIME(t2) ;
277 fprintf(msgFile, "\n CPU %8.3f : get permutations", t2 - t1) ;
278 MARKTIME(t1) ;
279 ETree_permuteVertices(frontETree, oldToNewIV) ;
280 MARKTIME(t2) ;
281 fprintf(msgFile, "\n CPU %8.3f : permute front tree", t2 - t1) ;
282 MARKTIME(t1) ;
283 InpMtx_permute(mtxA, IV_entries(oldToNewIV), IV_entries(oldToNewIV)) ;
284 MARKTIME(t2) ;
285 fprintf(msgFile, "\n CPU %8.3f : permute mtxA", t2 - t1) ;
286 MARKTIME(t1) ;
287 InpMtx_mapToUpperTriangle(mtxA) ;
288 MARKTIME(t2) ;
289 fprintf(msgFile, "\n CPU %8.3f : map to upper triangle", t2 - t1) ;
290 if ( ! INPMTX_IS_BY_CHEVRONS(mtxA) ) {
291 MARKTIME(t1) ;
292 InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS) ;
293 MARKTIME(t2) ;
294 fprintf(msgFile, "\n CPU %8.3f : change coordinate type", t2 - t1) ;
295 }
296 if ( INPMTX_IS_RAW_DATA(mtxA) ) {
297 MARKTIME(t1) ;
298 InpMtx_changeStorageMode(mtxA, INPMTX_SORTED) ;
299 MARKTIME(t2) ;
300 fprintf(msgFile, "\n CPU %8.3f : sort entries ", t2 - t1) ;
301 }
302 if ( INPMTX_IS_SORTED(mtxA) ) {
303 MARKTIME(t1) ;
304 InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
305 MARKTIME(t2) ;
306 fprintf(msgFile, "\n CPU %8.3f : convert to vectors ", t2 - t1) ;
307 }
308 MARKTIME(t1) ;
309 symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA) ;
310 MARKTIME(t2) ;
311 fprintf(msgFile, "\n CPU %8.3f : symbolic factorization", t2 - t1) ;
312 MARKTIME(t1) ;
313 DenseMtx_permuteRows(mtxB, oldToNewIV) ;
314 MARKTIME(t2) ;
315 fprintf(msgFile, "\n CPU %8.3f : permute rhs", t2 - t1) ;
316 /*
317 --------------------------------------------------
318 initialize the cumulative operations metric object
319 --------------------------------------------------
320 */
321 cumopsDV = DV_new() ;
322 DV_init(cumopsDV, nthread, NULL) ;
323 DV_fill(cumopsDV, 0.0) ;
324 /*
325 -------------------------------
326 create the owners map IV object
327 -------------------------------
328 */
329 switch ( maptype ) {
330 case 1 :
331 frontOwnersIV = ETree_wrapMap(frontETree, type,
332 symmetryflag, cumopsDV) ;
333 break ;
334 case 2 :
335 frontOwnersIV = ETree_balancedMap(frontETree, type,
336 symmetryflag, cumopsDV) ;
337 break ;
338 case 3 :
339 frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type,
340 symmetryflag, cumopsDV) ;
341 break ;
342 case 4 :
343 frontOwnersIV = ETree_ddMap(frontETree, type,
344 symmetryflag, cumopsDV, cutoff) ;
345 break ;
346 }
347 if ( msglvl > 0 ) {
348 fprintf(msgFile, "\n\n totalOps = %.0f", DV_sum(cumopsDV)) ;
349 DVscale(DV_size(cumopsDV), DV_entries(cumopsDV),
350 nthread/DV_sum(cumopsDV)) ;
351 fprintf(msgFile, "\n\n cumopsDV") ;
352 DV_writeForHumanEye(cumopsDV, msgFile) ;
353 }
354 DV_free(cumopsDV) ;
355 if ( msglvl > 1 ) {
356 fprintf(msgFile, "\n\n frontOwnersIV") ;
357 IV_writeForHumanEye(frontOwnersIV, msgFile) ;
358 fflush(msgFile) ;
359 }
360 /*
361 ------------------------------
362 initialize the FrontMtx object
363 ------------------------------
364 */
365 MARKTIME(t1) ;
366 frontmtx = FrontMtx_new() ;
367 mtxmanager = SubMtxManager_new() ;
368 SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ;
369 FrontMtx_init(frontmtx, frontETree, symbfacIVL,
370 type, symmetryflag, sparsityflag, pivotingflag,
371 LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ;
372 MARKTIME(t2) ;
373 fprintf(msgFile, "\n\n CPU %8.3f : initialize the front matrix",
374 t2 - t1) ;
375 if ( msglvl > 1 ) {
376 fprintf(msgFile,
377 "\n nendD = %d, nentL = %d, nentU = %d",
378 frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ;
379 }
380 if ( msglvl > 2 ) {
381 fprintf(msgFile, "\n front matrix initialized") ;
382 FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
383 fflush(msgFile) ;
384 }
385 SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ;
386 /*
387 -----------------
388 factor the matrix
389 -----------------
390 */
391 nzf = ETree_nFactorEntries(frontETree, symmetryflag) ;
392 factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ;
393 fprintf(msgFile,
394 "\n %d factor entries, %.0f factor ops, %8.3f ratio",
395 nzf, factorops, factorops/nzf) ;
396 IVzero(16, stats) ;
397 DVzero(11, cpus) ;
398 chvmanager = ChvManager_new() ;
399 ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ;
400 MARKTIME(t1) ;
401 error = -1 ;
402 rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, tau, droptol,
403 chvmanager, frontOwnersIV, lookahead,
404 &error, cpus, stats, msglvl, msgFile) ;
405 MARKTIME(t2) ;
406 fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops",
407 t2 - t1, 1.e-6*factorops/(t2-t1)) ;
408 if ( rootchv != NULL ) {
409 fprintf(msgFile, "\n\n factorization did not complete") ;
410 for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
411 fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
412 chv->id, chv->nD, chv->nL, chv->nU) ;
413 }
414 }
415 if ( error >= 0 ) {
416 fprintf(msgFile, "\n\n error encountered at front %d\n", error) ;
417 exit(-1) ;
418 }
419 fprintf(msgFile,
420 "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns",
421 stats[0], stats[1], stats[2]) ;
422 if ( frontmtx->rowadjIVL != NULL ) {
423 fprintf(msgFile,
424 "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ;
425 }
426 if ( frontmtx->coladjIVL != NULL ) {
427 fprintf(msgFile,
428 ", %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ;
429 }
430 if ( frontmtx->upperblockIVL != NULL ) {
431 fprintf(msgFile,
432 "\n %d fronts, %d entries in upperblockIVL",
433 frontmtx->nfront, frontmtx->upperblockIVL->tsize) ;
434 }
435 if ( frontmtx->lowerblockIVL != NULL ) {
436 fprintf(msgFile,
437 ", %d entries in lowerblockIVL",
438 frontmtx->lowerblockIVL->tsize) ;
439 }
440 fprintf(msgFile,
441 "\n %d entries in D, %d entries in L, %d entries in U",
442 stats[3], stats[4], stats[5]) ;
443 fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ;
444 cputotal = cpus[10] ;
445 if ( cputotal > 0.0 ) {
446 fprintf(msgFile,
447 "\n manage working storage %8.3f %6.2f"
448 "\n initialize/load fronts %8.3f %6.2f"
449 "\n update fronts %8.3f %6.2f"
450 "\n aggregate insert %8.3f %6.2f"
451 "\n aggregate remove/add %8.3f %6.2f"
452 "\n assemble postponed data %8.3f %6.2f"
453 "\n factor fronts %8.3f %6.2f"
454 "\n extract postponed data %8.3f %6.2f"
455 "\n store factor entries %8.3f %6.2f"
456 "\n miscellaneous %8.3f %6.2f"
457 "\n total time %8.3f",
458 cpus[0], 100.*cpus[0]/cputotal,
459 cpus[1], 100.*cpus[1]/cputotal,
460 cpus[2], 100.*cpus[2]/cputotal,
461 cpus[3], 100.*cpus[3]/cputotal,
462 cpus[4], 100.*cpus[4]/cputotal,
463 cpus[5], 100.*cpus[5]/cputotal,
464 cpus[6], 100.*cpus[6]/cputotal,
465 cpus[7], 100.*cpus[7]/cputotal,
466 cpus[8], 100.*cpus[8]/cputotal,
467 cpus[9], 100.*cpus[9]/cputotal, cputotal) ;
468 }
469 SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ;
470 ChvManager_writeForHumanEye(chvmanager, msgFile) ;
471 if ( msglvl > 2 ) {
472 fprintf(msgFile, "\n\n front factor matrix") ;
473 FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
474 }
475 /*
476 ------------------------------
477 post-process the factor matrix
478 ------------------------------
479 */
480 MARKTIME(t1) ;
481 FrontMtx_postProcess(frontmtx, msglvl, msgFile) ;
482 MARKTIME(t2) ;
483 fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ;
484 if ( msglvl > 2 ) {
485 fprintf(msgFile, "\n\n front factor matrix after post-processing") ;
486 FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
487 }
488 fprintf(msgFile, "\n\n after post-processing") ;
489 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
490 /*
491 ----------------
492 solve the system
493 ----------------
494 */
495 neqns = mtxB->nrow ;
496 nrhs = mtxB->ncol ;
497 mtxZ = DenseMtx_new() ;
498 DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ;
499 DenseMtx_zero(mtxZ) ;
500 if ( type == SPOOLES_REAL ) {
501 nops = frontmtx->nentD + 2*frontmtx->nentU ;
502 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
503 nops += 2*frontmtx->nentL ;
504 } else {
505 nops += 2*frontmtx->nentU ;
506 }
507 } else if ( type == SPOOLES_COMPLEX ) {
508 nops = 8*frontmtx->nentD + 8*frontmtx->nentU ;
509 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
510 nops += 8*frontmtx->nentL ;
511 } else {
512 nops += 8*frontmtx->nentU ;
513 }
514 }
515 nops *= nrhs ;
516 if ( msglvl > 2 ) {
517 fprintf(msgFile, "\n\n rhs") ;
518 DenseMtx_writeForHumanEye(mtxB, msgFile) ;
519 fflush(stdout) ;
520 }
521 DVzero(6, cpus) ;
522 MARKTIME(t1) ;
523 FrontMtx_solve(frontmtx, mtxZ, mtxB, mtxmanager,
524 cpus, msglvl, msgFile) ;
525 MARKTIME(t2) ;
526 fprintf(msgFile, "\n\n CPU %8.3f : solve the system, %.3f mflops",
527 t2 - t1, 1.e-6*nops/(t2 - t1)) ;
528 cputotal = t2 - t1 ;
529 if ( cputotal > 0.0 ) {
530 fprintf(msgFile,
531 "\n set up solves %8.3f %6.2f"
532 "\n load rhs and store solution %8.3f %6.2f"
533 "\n forward solve %8.3f %6.2f"
534 "\n diagonal solve %8.3f %6.2f"
535 "\n backward solve %8.3f %6.2f"
536 "\n total time %8.3f",
537 cpus[0], 100.*cpus[0]/cputotal,
538 cpus[1], 100.*cpus[1]/cputotal,
539 cpus[2], 100.*cpus[2]/cputotal,
540 cpus[3], 100.*cpus[3]/cputotal,
541 cpus[4], 100.*cpus[4]/cputotal, cputotal) ;
542 }
543 if ( msglvl > 2 ) {
544 fprintf(msgFile, "\n\n computed solution") ;
545 DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
546 fflush(stdout) ;
547 }
548 /*
549 -------------------------------------------------------------
550 permute the computed solution back into the original ordering
551 -------------------------------------------------------------
552 */
553 MARKTIME(t1) ;
554 DenseMtx_permuteRows(mtxZ, newToOldIV) ;
555 MARKTIME(t2) ;
556 fprintf(msgFile, "\n CPU %8.3f : permute solution", t2 - t1) ;
557 /*
558 -----------------
559 compute the error
560 -----------------
561 */
562 DenseMtx_sub(mtxZ, mtxX) ;
563 fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ;
564 if ( msglvl > 2 ) {
565 fprintf(msgFile, "\n\n error") ;
566 DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
567 fflush(stdout) ;
568 }
569 fprintf(msgFile, "\n\n after solve") ;
570 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
571 /*
572 ----------------------------
573 solve the system in parallel
574 ----------------------------
575 */
576 solvemap = SolveMap_new() ;
577 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx)
578 && FRONTMTX_IS_PIVOTING(frontmtx) ) {
579 SolveMap_ddMap(solvemap, SPOOLES_NONSYMMETRIC,
580 FrontMtx_upperBlockIVL(frontmtx),
581 FrontMtx_lowerBlockIVL(frontmtx),
582 nthread, frontOwnersIV, frontmtx->tree,
583 seed, msglvl, msgFile) ;
584 } else {
585 SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC,
586 FrontMtx_upperBlockIVL(frontmtx), NULL,
587 nthread, frontOwnersIV, frontmtx->tree,
588 seed, msglvl, msgFile) ;
589 }
590 fprintf(msgFile, "\n solve map created") ;
591 fflush(msgFile) ;
592 neqns = mtxB->nrow ;
593 nrhs = mtxB->ncol ;
594 DenseMtx_zero(mtxZ) ;
595 if ( type == SPOOLES_REAL ) {
596 nops = frontmtx->nentD + 2*frontmtx->nentU ;
597 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
598 nops += 2*frontmtx->nentL ;
599 } else {
600 nops += 2*frontmtx->nentU ;
601 }
602 } else if ( type == SPOOLES_COMPLEX ) {
603 nops = 8*frontmtx->nentD + 8*frontmtx->nentU ;
604 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
605 nops += 8*frontmtx->nentL ;
606 } else {
607 nops += 8*frontmtx->nentU ;
608 }
609 }
610 nops *= nrhs ;
611 if ( msglvl > 2 ) {
612 fprintf(msgFile, "\n\n rhs") ;
613 DenseMtx_writeForHumanEye(mtxB, msgFile) ;
614 fflush(stdout) ;
615 }
616 DVzero(6, cpus) ;
617 MARKTIME(t1) ;
618 FrontMtx_MT_solve(frontmtx, mtxZ, mtxB, mtxmanager, solvemap,
619 cpus, msglvl, msgFile) ;
620 MARKTIME(t2) ;
621 fprintf(msgFile, "\n\n CPU %8.3f : solve the system, %.3f mflops",
622 t2 - t1, 1.e-6*nops/(t2 - t1)) ;
623 cputotal = t2 - t1 ;
624 if ( cputotal > 0.0 ) {
625 fprintf(msgFile,
626 "\n set up solves %8.3f %6.2f"
627 "\n load rhs and store solution %8.3f %6.2f"
628 "\n forward solve %8.3f %6.2f"
629 "\n diagonal solve %8.3f %6.2f"
630 "\n backward solve %8.3f %6.2f"
631 "\n total time %8.3f",
632 cpus[0], 100.*cpus[0]/cputotal,
633 cpus[1], 100.*cpus[1]/cputotal,
634 cpus[2], 100.*cpus[2]/cputotal,
635 cpus[3], 100.*cpus[3]/cputotal,
636 cpus[4], 100.*cpus[4]/cputotal, cputotal) ;
637 }
638 if ( msglvl > 2 ) {
639 fprintf(msgFile, "\n\n computed solution") ;
640 DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
641 fflush(stdout) ;
642 }
643 /*
644 -------------------------------------------------------------
645 permute the computed solution back into the original ordering
646 -------------------------------------------------------------
647 */
648 MARKTIME(t1) ;
649 DenseMtx_permuteRows(mtxZ, newToOldIV) ;
650 MARKTIME(t2) ;
651 fprintf(msgFile, "\n CPU %8.3f : permute solution", t2 - t1) ;
652 /*
653 -----------------
654 compute the error
655 -----------------
656 */
657 DenseMtx_sub(mtxZ, mtxX) ;
658 fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ;
659 if ( msglvl > 2 ) {
660 fprintf(msgFile, "\n\n error") ;
661 DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
662 fflush(stdout) ;
663 }
664 fprintf(msgFile, "\n\n after solve") ;
665 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
666 /*
667 ------------------------
668 free the working storage
669 ------------------------
670 */
671 IV_free(oldToNewIV) ;
672 IV_free(newToOldIV) ;
673 IV_free(frontOwnersIV) ;
674 InpMtx_free(mtxA) ;
675 DenseMtx_free(mtxX) ;
676 DenseMtx_free(mtxB) ;
677 DenseMtx_free(mtxZ) ;
678 FrontMtx_free(frontmtx) ;
679 ETree_free(frontETree) ;
680 IVL_free(symbfacIVL) ;
681 ChvManager_free(chvmanager) ;
682 SubMtxManager_free(mtxmanager) ;
683 SolveMap_free(solvemap) ;
684
685 fprintf(msgFile, "\n") ;
686 fclose(msgFile) ;
687
688 return(1) ; }
689
690 /*--------------------------------------------------------------------*/
691