1 /*  testGridMT.c  */
2 
3 #include "../spoolesMT.h"
4 #include "../../FrontMtx.h"
5 #include "../../SolveMap.h"
6 #include "../../SymbFac.h"
7 #include "../../Drand.h"
8 #include "../../timings.h"
9 #include "../../misc.h"
10 
11 /*--------------------------------------------------------------------*/
12 /*
13 void mkNDlinsys ( int n1, int n2, int n3, int maxzeros, int maxsize,
14    int type, int symmetryflag, int nrhs, int seed, int msglvl,
15    FILE *msgFile, ETree **pfrontETree, IVL **psymbfacIVL,
16    InpMtx **pmtxA, DenseMtx **pmtxX, DenseMtx **pmtxB ) ;
17 */
18 /*--------------------------------------------------------------------*/
19 
20 int
main(int argc,char * argv[])21 main ( int argc, char *argv[] )
22 /*
23    -----------------------------------------------------
24    test the factor method for a grid matrix
25    (1) construct a linear system for a nested dissection
26        ordering on a regular grid
27    (2) create a solution matrix object
28    (3) multiply the solution with the matrix
29        to get a right hand side matrix object
30    (4) factor the matrix
31    (5) solve the system
32 
33    created -- 98may16, cca
34    -----------------------------------------------------
35 */
36 {
37 Chv             *chv, *rootchv ;
38 ChvManager      *chvmanager ;
39 DenseMtx        *mtxB, *mtxX, *mtxZ ;
40 FrontMtx        *frontmtx ;
41 InpMtx          *mtxA ;
42 SubMtxManager   *mtxmanager ;
43 double          cputotal, cutoff, droptol, factorops ;
44 double          cpus[11] ;
45 Drand           drand ;
46 double          nops, tau, t1, t2   ;
47 DV              *cumopsDV ;
48 ETree           *frontETree ;
49 FILE            *msgFile ;
50 int             error, lookahead, maptype, maxsize, maxzeros, msglvl,
51                 neqns, n1, n2, n3, nrhs, nthread, nzf, pivotingflag,
52                 seed, sparsityflag, symmetryflag,
53                 type ;
54 int             stats[16] ;
55 IV              *frontOwnersIV ;
56 IVL             *symbfacIVL ;
57 SolveMap        *solvemap ;
58 
59 if ( argc != 20 ) {
60    fprintf(stdout,
61 "\n\n usage : %s msglvl msgFile n1 n2 n3 maxzeros maxsize"
62 "\n         seed type symmetryflag sparsityflag pivotingflag "
63 "\n         tau droptol nrhs nthread maptype cutoff lookahead"
64 "\n    msglvl       -- message level"
65 "\n    msgFile      -- message file"
66 "\n    n1           -- number of grid points in the first direction"
67 "\n    n2           -- number of grid points in the second direction"
68 "\n    n3           -- number of grid points in the third direction"
69 "\n    maxzeros     -- max number of zeroes in a front"
70 "\n    maxsize      -- max number of internal nodes in a front"
71 "\n    seed         -- random number seed"
72 "\n    type         -- type of entries"
73 "\n       1 --> real entries"
74 "\n       2 --> complex entries"
75 "\n    symmetryflag -- symmetry flag"
76 "\n       0 --> symmetric"
77 "\n       1 --> hermitian"
78 "\n       2 --> nonsymmetric"
79 "\n    sparsityflag -- sparsity flag"
80 "\n       0 --> store dense fronts"
81 "\n       1 --> store sparse fronts, use droptol to drop entries"
82 "\n    pivotingflag -- pivoting flag"
83 "\n       0 --> do not pivot"
84 "\n       1 --> enable pivoting"
85 "\n    tau     -- upper bound on factor entries"
86 "\n               used only with pivoting"
87 "\n    droptol -- lower bound on factor entries"
88 "\n               used only with sparse fronts"
89 "\n    nrhs    -- # of right hand sides"
90 "\n    nthread -- number of threads"
91 "\n    maptype -- type of map from fronts to threads"
92 "\n       1 --> wrap map"
93 "\n       2 --> balanced map via a post-order traversal"
94 "\n       3 --> subtree-subset map"
95 "\n       4 --> domain decomposition map"
96 "\n    cutoff  -- cutoff used for domain decomposition map"
97 "\n       0 <= cutoff <= 1 used to define the multisector"
98 "\n lookahead -- lookahead for controlling the parallelism"
99 "\n", argv[0]) ;
100    return(-1) ;
101 }
102 msglvl = atoi(argv[1]) ;
103 if ( strcmp(argv[2], "stdout") == 0 ) {
104    msgFile = stdout ;
105 } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) {
106    fprintf(stderr, "\n fatal error in %s"
107            "\n unable to open file %s\n",
108            argv[0], argv[2]) ;
109    return(-1) ;
110 }
111 n1            = atoi(argv[3]) ;
112 n2            = atoi(argv[4]) ;
113 n3            = atoi(argv[5]) ;
114 maxzeros      = atoi(argv[6]) ;
115 maxsize       = atoi(argv[7]) ;
116 seed          = atoi(argv[8]) ;
117 type          = atoi(argv[9]) ;
118 symmetryflag  = atoi(argv[10]) ;
119 sparsityflag  = atoi(argv[11]) ;
120 pivotingflag  = atoi(argv[12]) ;
121 tau           = atof(argv[13]) ;
122 droptol       = atof(argv[14]) ;
123 nrhs          = atoi(argv[15]) ;
124 nthread       = atoi(argv[16]) ;
125 maptype       = atoi(argv[17]) ;
126 cutoff        = atof(argv[18]) ;
127 lookahead     = atoi(argv[19]) ;
128 fprintf(msgFile,
129         "\n %s "
130         "\n msglvl        -- %d"
131         "\n msgFile       -- %s"
132         "\n n1            -- %d"
133         "\n n2            -- %d"
134         "\n n3            -- %d"
135         "\n maxzeros      -- %d"
136         "\n maxsize       -- %d"
137         "\n seed          -- %d"
138         "\n type          -- %d"
139         "\n symmetryflag  -- %d"
140         "\n sparsityflag  -- %d"
141         "\n pivotingflag  -- %d"
142         "\n tau           -- %e"
143         "\n droptol       -- %e"
144         "\n nrhs          -- %d"
145         "\n nthread       -- %d"
146         "\n maptype       -- %d"
147         "\n cutoff        -- %f"
148         "\n lookahead     -- %d"
149         "\n",
150         argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize, seed,
151         type, symmetryflag, sparsityflag, pivotingflag, tau, droptol,
152         nrhs, nthread, maptype, cutoff, lookahead);
153 fflush(msgFile) ;
154 neqns = n1 * n2 * n3 ;
155 /*
156    --------------------------------------
157    initialize the random number generator
158    --------------------------------------
159 */
160 Drand_setDefaultFields(&drand) ;
161 Drand_init(&drand) ;
162 Drand_setSeed(&drand, seed) ;
163 /*
164 Drand_setUniform(&drand, 0.0, 1.0) ;
165 */
166 Drand_setNormal(&drand, 0.0, 1.0) ;
167 /*
168    --------------------------
169    generate the linear system
170    --------------------------
171 */
172 mkNDlinsys(n1, n2, n3, maxzeros, maxsize, type,
173            symmetryflag, nrhs, seed, msglvl, msgFile,
174            &frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ;
175 /*
176    --------------------------------------------------
177    initialize the cumulative operations metric object
178    --------------------------------------------------
179 */
180 cumopsDV = DV_new() ;
181 DV_init(cumopsDV, nthread, NULL) ;
182 DV_fill(cumopsDV, 0.0) ;
183 /*
184    -------------------------------
185    create the owners map IV object
186    -------------------------------
187 */
188 switch ( maptype ) {
189 case 1 :
190    frontOwnersIV = ETree_wrapMap(frontETree, type,
191                                  symmetryflag, cumopsDV) ;
192    break ;
193 case 2 :
194    frontOwnersIV = ETree_balancedMap(frontETree, type,
195                                  symmetryflag, cumopsDV) ;
196    break ;
197 case 3 :
198    frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type,
199                                  symmetryflag, cumopsDV) ;
200    break ;
201 case 4 :
202    frontOwnersIV = ETree_ddMap(frontETree, type,
203                                symmetryflag, cumopsDV, cutoff) ;
204    break ;
205 }
206 if ( msglvl > 0 ) {
207    fprintf(msgFile, "\n\n totalOps = %.0f", DV_sum(cumopsDV)) ;
208    DVscale(DV_size(cumopsDV), DV_entries(cumopsDV),
209             nthread/DV_sum(cumopsDV)) ;
210    fprintf(msgFile, "\n\n cumopsDV") ;
211    DV_writeForHumanEye(cumopsDV, msgFile) ;
212 }
213 DV_free(cumopsDV) ;
214 if ( msglvl > 1 ) {
215    fprintf(msgFile, "\n\n frontOwnersIV") ;
216    IV_writeForHumanEye(frontOwnersIV, msgFile) ;
217    fflush(msgFile) ;
218 }
219 /*
220    -------------------------------
221    initialize the FrontMtx object
222    -------------------------------
223 */
224 MARKTIME(t1) ;
225 frontmtx = FrontMtx_new() ;
226 mtxmanager = SubMtxManager_new() ;
227 SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ;
228 FrontMtx_init(frontmtx, frontETree, symbfacIVL,
229               type, symmetryflag, sparsityflag, pivotingflag,
230               LOCK_IN_PROCESS, 0, NULL, mtxmanager, msglvl, msgFile) ;
231 MARKTIME(t2) ;
232 fprintf(msgFile, "\n CPU %8.3f : initialize the front matrix",
233         t2 - t1) ;
234 if ( msglvl > 1 ) {
235    fprintf(msgFile, "\n nentD  = %d, nentL = %d, nentU = %d",
236         frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ;
237 }
238 if ( msglvl > 2 ) {
239    fprintf(msgFile, "\n front matrix initialized") ;
240    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
241    fflush(msgFile) ;
242 }
243 /*
244    -----------------
245    factor the matrix
246    -----------------
247 */
248 nzf       = ETree_nFactorEntries(frontETree, symmetryflag) ;
249 factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ;
250 fprintf(msgFile,
251         "\n %d factor entries, %.0f factor ops, %8.3f ratio",
252         nzf, factorops, factorops/nzf) ;
253 IVzero(16, stats) ;
254 DVzero(11, cpus) ;
255 fprintf(msgFile, "\n\n starting the parallel factor") ;
256 fflush(msgFile) ;
257 chvmanager = ChvManager_new() ;
258 ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ;
259 MARKTIME(t1) ;
260 rootchv = FrontMtx_MT_factorInpMtx(frontmtx, mtxA, tau, droptol,
261                                  chvmanager, frontOwnersIV, lookahead,
262                                  &error, cpus, stats, msglvl, msgFile) ;
263 MARKTIME(t2) ;
264 fprintf(msgFile, "\n CPU %8.3f : factor matrix, %8.3f mflops",
265         t2 - t1, 1.e-6*factorops/(t2-t1)) ;
266 if ( rootchv != NULL ) {
267    fprintf(msgFile, "\n\n factorization did not complete") ;
268    for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
269       fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
270               chv->id, chv->nD, chv->nL, chv->nU) ;
271    }
272 }
273 if ( error >= 0 ) {
274    fprintf(msgFile, "\n\n fatal error encountered at front %d", error) ;
275    exit(-1) ;
276 }
277 fprintf(msgFile,
278         "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns",
279         stats[0], stats[1], stats[2]) ;
280 if ( frontmtx->rowadjIVL != NULL ) {
281    fprintf(msgFile,
282            "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ;
283 }
284 if ( frontmtx->coladjIVL != NULL ) {
285    fprintf(msgFile,
286            "\n %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ;
287 }
288 if ( frontmtx->upperblockIVL != NULL ) {
289    fprintf(msgFile,
290            "\n %d fronts, %d entries in upperblockIVL",
291            frontmtx->nfront, frontmtx->upperblockIVL->tsize) ;
292 }
293 if ( frontmtx->lowerblockIVL != NULL ) {
294    fprintf(msgFile,
295            ", %d entries in lowerblockIVL",
296            frontmtx->lowerblockIVL->tsize) ;
297 }
298 fprintf(msgFile,
299         "\n %d entries in D, %d entries in L, %d entries in U",
300         stats[3], stats[4], stats[5]) ;
301 fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ;
302 cputotal = cpus[10] ;
303 if ( cputotal > 0.0 ) {
304    fprintf(msgFile,
305    "\n    manage working storage  %8.3f %6.2f"
306    "\n    initialize/load fronts  %8.3f %6.2f"
307    "\n    update fronts           %8.3f %6.2f"
308    "\n    aggregate insert        %8.3f %6.2f"
309    "\n    aggregate remove/add    %8.3f %6.2f"
310    "\n    assemble postponed data %8.3f %6.2f"
311    "\n    factor fronts           %8.3f %6.2f"
312    "\n    extract postponed data  %8.3f %6.2f"
313    "\n    store factor entries    %8.3f %6.2f"
314    "\n    miscellaneous           %8.3f %6.2f"
315    "\n    total time              %8.3f",
316    cpus[0], 100.*cpus[0]/cputotal,
317    cpus[1], 100.*cpus[1]/cputotal,
318    cpus[2], 100.*cpus[2]/cputotal,
319    cpus[3], 100.*cpus[3]/cputotal,
320    cpus[4], 100.*cpus[4]/cputotal,
321    cpus[5], 100.*cpus[5]/cputotal,
322    cpus[6], 100.*cpus[6]/cputotal,
323    cpus[7], 100.*cpus[7]/cputotal,
324    cpus[8], 100.*cpus[8]/cputotal,
325    cpus[9], 100.*cpus[9]/cputotal, cputotal) ;
326 }
327 SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ;
328 ChvManager_writeForHumanEye(chvmanager, msgFile) ;
329 if ( msglvl > 2 ) {
330    fprintf(msgFile, "\n\n front factor matrix") ;
331    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
332 }
333 /*
334    ------------------------------
335    post-process the factor matrix
336    ------------------------------
337 */
338 MARKTIME(t1) ;
339 FrontMtx_postProcess(frontmtx, msglvl, msgFile) ;
340 MARKTIME(t2) ;
341 fprintf(msgFile, "\n CPU %8.3f : post-process the matrix", t2 - t1) ;
342 if ( msglvl > 2 ) {
343    fprintf(msgFile, "\n\n front factor matrix after post-processing") ;
344    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
345 }
346 fprintf(msgFile, "\n\n after post-processing") ;
347 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
348 /*
349    ----------------
350    solve the system
351    ----------------
352 */
353 neqns = mtxB->nrow ;
354 nrhs  = mtxB->ncol ;
355 mtxZ  = DenseMtx_new() ;
356 DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ;
357 DenseMtx_zero(mtxZ) ;
358 if ( type == SPOOLES_REAL ) {
359    nops = frontmtx->nentD + 2*frontmtx->nentU ;
360    if ( frontmtx->symmetryflag == 2 ) {
361       nops += 2*frontmtx->nentL ;
362    } else {
363       nops += 2*frontmtx->nentU ;
364    }
365 } if ( type == SPOOLES_COMPLEX ) {
366    nops = 8*frontmtx->nentD + 8*frontmtx->nentU ;
367    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
368       nops += 8*frontmtx->nentL ;
369    } else {
370       nops += 8*frontmtx->nentU ;
371    }
372 }
373 nops *= nrhs ;
374 if ( msglvl > 2 ) {
375    fprintf(msgFile, "\n\n rhs") ;
376    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
377    fflush(stdout) ;
378 }
379 DVzero(6, cpus) ;
380 MARKTIME(t1) ;
381 FrontMtx_solve(frontmtx, mtxZ, mtxB, mtxmanager,
382                cpus, msglvl, msgFile) ;
383 MARKTIME(t2) ;
384 fprintf(msgFile, "\n CPU %8.3f : solve the system, %.3f mflops",
385         t2 - t1, 1.e-6*nops/(t2 - t1)) ;
386 cputotal = cpus[5] ;
387 if ( cputotal > 0.0 ) {
388    fprintf(msgFile,
389    "\n    set up solves               %8.3f %6.2f"
390    "\n    load rhs and store solution %8.3f %6.2f"
391    "\n    forward solve               %8.3f %6.2f"
392    "\n    diagonal solve              %8.3f %6.2f"
393    "\n    backward solve              %8.3f %6.2f"
394    "\n    total time              %8.3f",
395    cpus[0], 100.*cpus[0]/cputotal,
396    cpus[1], 100.*cpus[1]/cputotal,
397    cpus[2], 100.*cpus[2]/cputotal,
398    cpus[3], 100.*cpus[3]/cputotal,
399    cpus[4], 100.*cpus[4]/cputotal, cputotal) ;
400 }
401 if ( msglvl > 2 ) {
402    fprintf(msgFile, "\n\n computed solution") ;
403    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
404    fflush(stdout) ;
405 }
406 DenseMtx_sub(mtxZ, mtxX) ;
407 fprintf(msgFile, "\n\n serial maxabs error   = %12.4e",
408         DenseMtx_maxabs(mtxZ)) ;
409 if ( msglvl > 2 ) {
410    fprintf(msgFile, "\n\n error") ;
411    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
412    fflush(stdout) ;
413 }
414 fprintf(msgFile, "\n\n after solve") ;
415 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
416 /*
417    --------------------------------
418    now solve the system in parallel
419    --------------------------------
420 */
421 solvemap = SolveMap_new() ;
422 if (  FRONTMTX_IS_NONSYMMETRIC(frontmtx)
423    && FRONTMTX_IS_PIVOTING(frontmtx) ) {
424    SolveMap_ddMap(solvemap, SPOOLES_NONSYMMETRIC,
425                   FrontMtx_upperBlockIVL(frontmtx),
426                   FrontMtx_lowerBlockIVL(frontmtx),
427                   nthread, frontOwnersIV, frontmtx->tree,
428                   seed, msglvl, msgFile) ;
429 } else {
430    SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC,
431                   FrontMtx_upperBlockIVL(frontmtx), NULL,
432                   nthread, frontOwnersIV, frontmtx->tree,
433                   seed, msglvl, msgFile) ;
434 }
435 fprintf(msgFile, "\n solve map created") ;
436 fflush(msgFile) ;
437 DVzero(6, cpus) ;
438 if ( msglvl > 2 ) {
439    fprintf(msgFile, "\n\n rhs") ;
440    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
441    fflush(stdout) ;
442 }
443 DenseMtx_zero(mtxZ) ;
444 MARKTIME(t1) ;
445 FrontMtx_MT_solve(frontmtx, mtxZ, mtxB, mtxmanager, solvemap,
446                   cpus, msglvl, msgFile) ;
447 MARKTIME(t2) ;
448 fprintf(msgFile, "\n CPU %8.3f : solve the system, %.3f mflops",
449         t2 - t1, 1.e-6*nops/(t2 - t1)) ;
450 cputotal = cpus[5] ;
451 if ( cputotal > 0.0 ) {
452    fprintf(msgFile,
453    "\n    set up solves               %8.3f %6.2f"
454    "\n    load rhs and store solution %8.3f %6.2f"
455    "\n    forward solve               %8.3f %6.2f"
456    "\n    diagonal solve              %8.3f %6.2f"
457    "\n    backward solve              %8.3f %6.2f"
458    "\n    total time              %8.3f",
459    cpus[0], 100.*cpus[0]/cputotal,
460    cpus[1], 100.*cpus[1]/cputotal,
461    cpus[2], 100.*cpus[2]/cputotal,
462    cpus[3], 100.*cpus[3]/cputotal,
463    cpus[4], 100.*cpus[4]/cputotal, cputotal) ;
464 }
465 if ( msglvl > 2 ) {
466    fprintf(msgFile, "\n\n computed solution") ;
467    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
468    fflush(stdout) ;
469 }
470 DenseMtx_sub(mtxZ, mtxX) ;
471 fprintf(msgFile, "\n\n parallel maxabs error   = %12.4e",
472         DenseMtx_maxabs(mtxZ)) ;
473 if ( msglvl > 2 ) {
474    fprintf(msgFile, "\n\n error") ;
475    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
476    fflush(stdout) ;
477 }
478 fprintf(msgFile, "\n\n after solve") ;
479 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
480 /*
481    ------------------------
482    free the working storage
483    ------------------------
484 */
485 FrontMtx_free(frontmtx) ;
486 ETree_free(frontETree) ;
487 IV_free(frontOwnersIV) ;
488 IVL_free(symbfacIVL) ;
489 InpMtx_free(mtxA) ;
490 DenseMtx_free(mtxX) ;
491 DenseMtx_free(mtxB) ;
492 DenseMtx_free(mtxZ) ;
493 ChvManager_free(chvmanager) ;
494 SubMtxManager_free(mtxmanager) ;
495 SolveMap_free(solvemap) ;
496 
497 fprintf(msgFile, "\n") ;
498 fclose(msgFile) ;
499 
500 return(0) ; }
501 
502 /*--------------------------------------------------------------------*/
503