1 /*  testGrid.c  */
2 
3 #include "../SemiImplMtx.h"
4 #include "../../Drand.h"
5 #include "../../timings.h"
6 #include "../../misc.h"
7 
8 /*--------------------------------------------------------------------*/
9 IV * get_frontmapIV ( Tree *tree, int depth ) ;
10 /*--------------------------------------------------------------------*/
11 
12 int
main(int argc,char * argv[])13 main ( int argc, char *argv[] )
14 /*
15    -----------------------------------------------------
16    test the factor method for a grid matrix
17    (1) construct a linear system for a nested dissection
18        ordering on a regular grid
19    (2) create a solution matrix object
20    (3) multiply the solution with the matrix
21        to get a right hand side matrix object
22    (4) factor the matrix
23    (5) solve the system
24 
25    created -- 98may16, cca
26    -----------------------------------------------------
27 */
28 {
29 Chv             *chv, *rootchv ;
30 ChvManager      *chvmanager ;
31 DenseMtx        *mtxB, *mtxX, *mtxZ ;
32 FrontMtx        *frontmtx ;
33 InpMtx          *mtxA ;
34 SemiImplMtx     *semimtx ;
35 SubMtxManager   *mtxmanager ;
36 double          cputotal, droptol, factorops, initCPU ;
37 double          cpus[10] ;
38 Drand           drand ;
39 double          nsolveops, tau, t1, t2   ;
40 ETree           *frontETree   ;
41 FILE            *msgFile ;
42 int             depth, error, maxsize, maxzeros, msglvl,
43                 neqns, n1, n2, n3, nrhs, nzf, pivotingflag, rc,
44                 seed, sparsityflag, symmetryflag, type, v ;
45 int             stats[13] ;
46 int             *vtxToFront ;
47 IV              *frontmapIV ;
48 IVL             *symbfacIVL ;
49 
50 if ( argc != 17 ) {
51    fprintf(stdout,
52 "\n\n usage : %s msglvl msgFile n1 n2 n3 maxzeros maxsize"
53 "\n         seed type symmetryflag sparsityflag "
54 "\n         pivotingflag tau droptol nrhs depth"
55 "\n    msglvl   -- message level"
56 "\n    msgFile  -- message file"
57 "\n    n1       -- number of grid points in the first direction"
58 "\n    n2       -- number of grid points in the second direction"
59 "\n    n3       -- number of grid points in the third direction"
60 "\n    maxzeros -- max number of zeroes in a front"
61 "\n    maxsize  -- max number of internal nodes in a front"
62 "\n    seed     -- random number seed"
63 "\n    type     -- type of entries"
64 "\n       1 --> real"
65 "\n       2 --> complex"
66 "\n    symmetryflag -- symmetry flag"
67 "\n       0 --> symmetric "
68 "\n       1 --> hermitian"
69 "\n       2 --> nonsymmetric"
70 "\n    sparsityflag -- sparsity flag"
71 "\n       0 --> store dense fronts"
72 "\n       1 --> store sparse fronts, use droptol to drop entries"
73 "\n    pivotingflag -- pivoting flag"
74 "\n       0 --> do not pivot"
75 "\n       1 --> enable pivoting"
76 "\n    tau     -- upper bound on factor entries"
77 "\n               used only with pivoting"
78 "\n    droptol -- lower bound on factor entries"
79 "\n               used only with sparse fronts"
80 "\n    nrhs     -- # of right hand sides"
81 "\n    depth    -- depth for multisector"
82 "\n", argv[0]) ;
83    return(-1) ;
84 }
85 msglvl = atoi(argv[1]) ;
86 if ( strcmp(argv[2], "stdout") == 0 ) {
87    msgFile = stdout ;
88 } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) {
89    fprintf(stderr, "\n fatal error in %s"
90            "\n unable to open file %s\n",
91            argv[0], argv[2]) ;
92    return(-1) ;
93 }
94 n1            = atoi(argv[3]) ;
95 n2            = atoi(argv[4]) ;
96 n3            = atoi(argv[5]) ;
97 maxzeros      = atoi(argv[6]) ;
98 maxsize       = atoi(argv[7]) ;
99 seed          = atoi(argv[8]) ;
100 type          = atoi(argv[9]) ;
101 symmetryflag  = atoi(argv[10]) ;
102 sparsityflag  = atoi(argv[11]) ;
103 pivotingflag  = atoi(argv[12]) ;
104 tau           = atof(argv[13]) ;
105 droptol       = atof(argv[14]) ;
106 nrhs          = atoi(argv[15]) ;
107 depth         = atoi(argv[16]) ;
108 fprintf(msgFile,
109         "\n %s "
110         "\n msglvl        -- %d"
111         "\n msgFile       -- %s"
112         "\n n1            -- %d"
113         "\n n2            -- %d"
114         "\n n3            -- %d"
115         "\n maxzeros      -- %d"
116         "\n maxsize       -- %d"
117         "\n seed          -- %d"
118         "\n type          -- %d"
119         "\n symmetryflag  -- %d"
120         "\n sparsityflag  -- %d"
121         "\n pivotingflag  -- %d"
122         "\n tau           -- %e"
123         "\n droptol       -- %e"
124         "\n nrhs          -- %d"
125         "\n depth         -- %d"
126         "\n",
127         argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize,
128         seed, type, symmetryflag, sparsityflag, pivotingflag,
129         tau, droptol, nrhs, depth) ;
130 fflush(msgFile) ;
131 neqns = n1 * n2 * n3 ;
132 /*
133    --------------------------------------
134    initialize the random number generator
135    --------------------------------------
136 */
137 Drand_setDefaultFields(&drand) ;
138 Drand_init(&drand) ;
139 Drand_setSeed(&drand, seed) ;
140 /*
141 Drand_setUniform(&drand, 0.0, 1.0) ;
142 */
143 Drand_setNormal(&drand, 0.0, 1.0) ;
144 /*
145    --------------------------
146    generate the linear system
147    --------------------------
148 */
149 mkNDlinsys(n1, n2, n3, maxzeros, maxsize, type,
150            symmetryflag, nrhs, seed, msglvl, msgFile,
151            &frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ;
152 if ( msglvl > 3 ) {
153    fprintf(msgFile, "\n mtxA") ;
154    InpMtx_writeForHumanEye(mtxA, msgFile) ;
155    fprintf(msgFile, "\n mtxX") ;
156    DenseMtx_writeForHumanEye(mtxX, msgFile) ;
157    fprintf(msgFile, "\n mtxB") ;
158    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
159    fflush(msgFile) ;
160 }
161 if ( msglvl > 3 ) {
162    fprintf(msgFile, "\n\n %% MATLAB file: linear system") ;
163    fprintf(msgFile, "\n A = zeros(%d,%d) ;", neqns, neqns) ;
164    fprintf(msgFile, "\n X = zeros(%d,1) ;", neqns) ;
165    fprintf(msgFile, "\n B = zeros(%d,1) ;", neqns) ;
166    InpMtx_writeForMatlab(mtxA, "A", msgFile) ;
167    DenseMtx_writeForMatlab(mtxX, "X", msgFile) ;
168    DenseMtx_writeForMatlab(mtxB, "B", msgFile) ;
169    fflush(msgFile) ;
170 }
171 /*
172    ------------------------------
173    initialize the FrontMtx object
174    ------------------------------
175 */
176 MARKTIME(t1) ;
177 frontmtx   = FrontMtx_new() ;
178 mtxmanager = SubMtxManager_new() ;
179 SubMtxManager_init(mtxmanager, NO_LOCK, 0) ;
180 FrontMtx_init(frontmtx, frontETree, symbfacIVL,
181               type, symmetryflag, sparsityflag, pivotingflag,
182               NO_LOCK, 0, NULL, mtxmanager, msglvl, msgFile) ;
183 MARKTIME(t2) ;
184 fprintf(msgFile, "\n\n CPU %8.3f : initialize the front matrix",
185         t2 - t1) ;
186 if ( msglvl > 0 ) {
187    fprintf(msgFile,
188            "\n nendD  = %d, nentL = %d, nentU = %d",
189            frontmtx->nentD, frontmtx->nentL, frontmtx->nentU) ;
190 }
191 if ( msglvl > 3 ) {
192    fprintf(msgFile, "\n front matrix initialized") ;
193    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
194    fflush(msgFile) ;
195 }
196 SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ;
197 /*
198    -----------------
199    factor the matrix
200    -----------------
201 */
202 nzf       = ETree_nFactorEntries(frontETree, symmetryflag) ;
203 factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ;
204 fprintf(msgFile,
205         "\n %d factor entries, %.0f factor ops, %8.3f ratio",
206         nzf, factorops, factorops/nzf) ;
207 IVzero(6, stats) ;
208 DVzero(9, cpus) ;
209 chvmanager = ChvManager_new() ;
210 ChvManager_init(chvmanager, NO_LOCK, 1) ;
211 MARKTIME(t1) ;
212 rootchv = FrontMtx_factorInpMtx(frontmtx, mtxA, tau, droptol,
213                                 chvmanager, &error, cpus,
214                                 stats, msglvl, msgFile) ;
215 MARKTIME(t2) ;
216 fprintf(msgFile, "\n\n CPU %8.3f : factor matrix, %8.3f mflops",
217         t2 - t1, 1.e-6*factorops/(t2-t1)) ;
218 if ( rootchv != NULL ) {
219    fprintf(msgFile, "\n\n factorization did not complete") ;
220    for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
221       fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
222               chv->id, chv->nD, chv->nL, chv->nU) ;
223    }
224 }
225 if ( error >= 0 ) {
226    fprintf(msgFile, "\n\n error encountered at front %d\n", error) ;
227    exit(-1) ;
228 }
229 fprintf(msgFile,
230         "\n %8d pivots, %8d pivot tests, %8d delayed rows and columns",
231         stats[0], stats[1], stats[2]) ;
232 if ( frontmtx->rowadjIVL != NULL ) {
233    fprintf(msgFile,
234            "\n %d entries in rowadjIVL", frontmtx->rowadjIVL->tsize) ;
235 }
236 if ( frontmtx->coladjIVL != NULL ) {
237    fprintf(msgFile,
238            ", %d entries in coladjIVL", frontmtx->coladjIVL->tsize) ;
239 }
240 if ( frontmtx->upperblockIVL != NULL ) {
241    fprintf(msgFile,
242            "\n %d fronts, %d entries in upperblockIVL",
243            frontmtx->nfront, frontmtx->upperblockIVL->tsize) ;
244 }
245 if ( frontmtx->lowerblockIVL != NULL ) {
246    fprintf(msgFile,
247            ", %d entries in lowerblockIVL",
248            frontmtx->lowerblockIVL->tsize) ;
249 }
250 fprintf(msgFile,
251         "\n %d entries in D, %d entries in L, %d entries in U",
252         stats[3], stats[4], stats[5]) ;
253 fprintf(msgFile, "\n %d locks", frontmtx->nlocks) ;
254 cputotal = cpus[8] ;
255 if ( cputotal > 0.0 ) {
256    fprintf(msgFile,
257    "\n    initialize fronts       %8.3f %6.2f"
258    "\n    load original entries   %8.3f %6.2f"
259    "\n    update fronts           %8.3f %6.2f"
260    "\n    assemble postponed data %8.3f %6.2f"
261    "\n    factor fronts           %8.3f %6.2f"
262    "\n    extract postponed data  %8.3f %6.2f"
263    "\n    store factor entries    %8.3f %6.2f"
264    "\n    miscellaneous           %8.3f %6.2f"
265    "\n    total time              %8.3f",
266    cpus[0], 100.*cpus[0]/cputotal,
267    cpus[1], 100.*cpus[1]/cputotal,
268    cpus[2], 100.*cpus[2]/cputotal,
269    cpus[3], 100.*cpus[3]/cputotal,
270    cpus[4], 100.*cpus[4]/cputotal,
271    cpus[5], 100.*cpus[5]/cputotal,
272    cpus[6], 100.*cpus[6]/cputotal,
273    cpus[7], 100.*cpus[7]/cputotal, cputotal) ;
274 }
275 SubMtxManager_writeForHumanEye(mtxmanager, msgFile) ;
276 ChvManager_writeForHumanEye(chvmanager, msgFile) ;
277 if ( msglvl > 3 ) {
278    fprintf(msgFile, "\n\n front factor matrix") ;
279    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
280    fflush(msgFile) ;
281 }
282 if ( msglvl > 3 ) {
283    fprintf(msgFile, "\n\n %% MATLAB file: front factor matrix") ;
284    FrontMtx_writeForMatlab(frontmtx, "L", "D", "U", msgFile) ;
285    fflush(msgFile) ;
286 }
287 /*
288    ------------------------------
289    post-process the factor matrix
290    ------------------------------
291 */
292 MARKTIME(t1) ;
293 FrontMtx_postProcess(frontmtx, msglvl, msgFile) ;
294 MARKTIME(t2) ;
295 fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ;
296 if ( msglvl > 4 ) {
297    fprintf(msgFile, "\n\n front factor matrix after post-processing") ;
298    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
299 }
300 fprintf(msgFile, "\n\n after post-processing") ;
301 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
302 /*
303    ----------------
304    solve the system
305    ----------------
306 */
307 neqns = mtxB->nrow ;
308 nrhs  = mtxB->ncol ;
309 mtxZ  = DenseMtx_new() ;
310 DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ;
311 DenseMtx_zero(mtxZ) ;
312 nsolveops = FrontMtx_nSolveOps(frontmtx) ;
313 nsolveops *= nrhs ;
314 if ( msglvl > 3 ) {
315    fprintf(msgFile, "\n\n rhs") ;
316    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
317    fflush(stdout) ;
318 }
319 DVzero(6, cpus) ;
320 MARKTIME(t1) ;
321 FrontMtx_solve(frontmtx, mtxZ, mtxB, mtxmanager,
322                cpus, msglvl, msgFile) ;
323 MARKTIME(t2) ;
324 fprintf(msgFile,
325      "\n\n CPU %8.3f : direct solve the system, %.0f ops, %.3f mflops",
326      t2 - t1, nsolveops, 1.e-6*nsolveops/(t2 - t1)) ;
327 cputotal = t2 - t1 ;
328 if ( cputotal > 0.0 ) {
329    fprintf(msgFile,
330    "\n    set up solves               %8.3f %6.2f"
331    "\n    load rhs and store solution %8.3f %6.2f"
332    "\n    forward solve               %8.3f %6.2f"
333    "\n    diagonal solve              %8.3f %6.2f"
334    "\n    backward solve              %8.3f %6.2f"
335    "\n    total time                  %8.3f",
336    cpus[0], 100.*cpus[0]/cputotal,
337    cpus[1], 100.*cpus[1]/cputotal,
338    cpus[2], 100.*cpus[2]/cputotal,
339    cpus[3], 100.*cpus[3]/cputotal,
340    cpus[4], 100.*cpus[4]/cputotal, cputotal) ;
341 }
342 if ( msglvl > 3 ) {
343    fprintf(msgFile, "\n\n computed solution") ;
344    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
345    fflush(stdout) ;
346 }
347 DenseMtx_sub(mtxZ, mtxX) ;
348 fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ;
349 if ( msglvl > 3 ) {
350    fprintf(msgFile, "\n\n error") ;
351    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
352    fflush(stdout) ;
353 }
354 fprintf(msgFile, "\n\n after solve") ;
355 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
356 /*
357    ------------------------
358    get the front matrix map
359    ------------------------
360 */
361 MARKTIME(t1) ;
362 frontmapIV = get_frontmapIV(frontmtx->tree, depth) ;
363 MARKTIME(t2) ;
364 fprintf(msgFile, "\n\n CPU %8.3f : get the front map", t2 - t1) ;
365 if ( msglvl > 1 ) {
366    fprintf(msgFile, "\n\n frontmapIV") ;
367    IV_writeForHumanEye(frontmapIV, msgFile) ;
368    fflush(msgFile) ;
369 }
370 /*
371    --------------------------------------
372    create the semi-implicit matrix object
373    --------------------------------------
374 */
375 MARKTIME(t1) ;
376 semimtx = SemiImplMtx_new() ;
377 rc = SemiImplMtx_initFromFrontMtx(semimtx, frontmtx, mtxA,
378                                   frontmapIV, msglvl, msgFile) ;
379 MARKTIME(t2) ;
380 initCPU = t2 - t1 ;
381 fprintf(msgFile, "\n\n CPU %8.3f : initialize the SemiImplMtx",
382         t2 - t1) ;
383 if ( msglvl > 3 ) {
384    fprintf(msgFile, "\n\n Semi-implicit matrix") ;
385    SemiImplMtx_writeForHumanEye(semimtx, msgFile) ;
386    fflush(msgFile) ;
387 }
388 SemiImplMtx_stats(semimtx, stats) ;
389 fprintf(msgFile, "\n stats[11] = %d", stats[11]) ;
390 fprintf(msgFile,
391         "\n %d eqns, %d domain eqns, %d schur eqns"
392         "\n |L11| = %d, |D11| = %d, |U11| = %d"
393         "\n |L22| = %d, |D22| = %d, |U22| = %d"
394         "\n |A12| = %d, |A22| = %d,"
395         "\n %d total matrix entries, %d solve operations",
396         stats[0], stats[1], stats[2], stats[3], stats[4], stats[5],
397         stats[6], stats[7], stats[8], stats[9], stats[10], stats[11],
398         stats[12]) ;
399 fprintf(msgFile, "\n STATS2 %2d %8d %8d %10d %10d %10d",
400         depth, stats[1], stats[2], stats[3] + stats[4] + stats[5],
401         stats[6] + stats[7] + stats[8], stats[9] + stats[10]) ;
402 /*
403    ------------------------------------------------------
404    solve the system using the semi-implicit factorization
405    ------------------------------------------------------
406 */
407 DenseMtx_zero(mtxZ) ;
408 nsolveops = 2*FrontMtx_nSolveOps(semimtx->domainMtx) ;
409 nsolveops += FrontMtx_nSolveOps(semimtx->schurMtx) ;
410 nsolveops += 2*semimtx->A12->nent ;
411 if ( symmetryflag == SPOOLES_NONSYMMETRIC ) {
412    nsolveops += 2*semimtx->A21->nent ;
413 } else {
414    nsolveops += 2*semimtx->A12->nent ;
415 }
416 nsolveops *= nrhs ;
417 DVzero(9, cpus) ;
418 MARKTIME(t1) ;
419 rc = SemiImplMtx_solve(semimtx, mtxZ, mtxB,
420                        mtxmanager, cpus, msglvl, msgFile) ;
421 MARKTIME(t2) ;
422 fprintf(msgFile,
423         "\n\n CPU %8.3f : semi solve the system, %.0f ops, %.3f mflops",
424         t2 - t1, nsolveops, 1.e-6*nsolveops/(t2 - t1)) ;
425 fprintf(msgFile, "\n STATS1 %2d %8.3f %11d %16d %10.3f",
426         depth, initCPU, stats[11], stats[12], t2 - t1) ;
427 cputotal = t2 - t1 ;
428 if ( cputotal > 0.0 ) {
429    fprintf(msgFile,
430    "\n    init working matrices       %8.3f %6.2f"
431    "\n    load rhs                    %8.3f %6.2f"
432    "\n    first solve with domains    %8.3f %6.2f"
433    "\n    compute schur rhs           %8.3f %6.2f"
434    "\n    solve with schur complement %8.3f %6.2f"
435    "\n    compute domains' rhs        %8.3f %6.2f"
436    "\n    second solve with domains   %8.3f %6.2f"
437    "\n    store solution              %8.3f %6.2f"
438    "\n    miscellaneous time          %8.3f %6.2f"
439    "\n    total time                  %8.3f",
440    cpus[0], 100.*cpus[0]/cputotal,
441    cpus[1], 100.*cpus[1]/cputotal,
442    cpus[2], 100.*cpus[2]/cputotal,
443    cpus[3], 100.*cpus[3]/cputotal,
444    cpus[4], 100.*cpus[4]/cputotal,
445    cpus[5], 100.*cpus[5]/cputotal,
446    cpus[6], 100.*cpus[6]/cputotal,
447    cpus[7], 100.*cpus[7]/cputotal,
448    cpus[8], 100.*cpus[8]/cputotal, cputotal) ;
449 }
450 if ( msglvl > 3 ) {
451    fprintf(msgFile, "\n\n computed solution") ;
452    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
453    fflush(stdout) ;
454 }
455 DenseMtx_sub(mtxZ, mtxX) ;
456 fprintf(msgFile, "\n\n maxabs error = %12.4e", DenseMtx_maxabs(mtxZ)) ;
457 if ( msglvl > 2 ) {
458    fprintf(msgFile, "\n\n error") ;
459    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
460    fflush(stdout) ;
461 }
462 fprintf(msgFile, "\n\n after solve") ;
463 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
464 /*
465    ------------------------
466    free the working storage
467    ------------------------
468 */
469 InpMtx_free(mtxA) ;
470 DenseMtx_free(mtxX) ;
471 DenseMtx_free(mtxB) ;
472 DenseMtx_free(mtxZ) ;
473 SemiImplMtx_free(semimtx) ;
474 FrontMtx_free(frontmtx) ;
475 ETree_free(frontETree) ;
476 IVL_free(symbfacIVL) ;
477 ChvManager_free(chvmanager) ;
478 SubMtxManager_free(mtxmanager) ;
479 IV_free(frontmapIV) ;
480 
481 fprintf(msgFile, "\n") ;
482 fclose(msgFile) ;
483 
484 return(1) ; }
485 
486 /*--------------------------------------------------------------------*/
487 IV *
get_frontmapIV(Tree * tree,int depth)488 get_frontmapIV (
489    Tree   *tree,
490    int    depth
491 ) {
492 int   J, K, nfront ;
493 int   *fch, *frontmap, *levels, *par, *sib ;
494 IV    *frontmapIV ;
495 
496 nfront = tree->n ;
497 par    = tree->par ;
498 fch    = tree->fch ;
499 sib    = tree->sib ;
500 frontmapIV = IV_new() ;
501 IV_init(frontmapIV, nfront, NULL) ;
502 frontmap = levels = IV_entries(frontmapIV) ;
503 for ( J = Tree_preOTfirst(tree) ;
504       J != -1 ;
505       J = Tree_preOTnext(tree, J) ) {
506    if ( (K = par[J]) == -1 ) {
507       levels[J] = 0 ;
508    } else {
509       if ( J == fch[K] && sib[J] == -1 ) {
510          levels[J] = levels[K] ;
511       } else {
512          levels[J] = levels[K] + 1 ;
513       }
514    }
515 }
516 for ( J = 0 ; J < nfront ; J++ ) {
517    if ( levels[J] < depth ) {
518       frontmap[J] = 0 ;
519    } else {
520       frontmap[J] = 1 ;
521    }
522 }
523 return(frontmapIV) ; }
524 
525 /*--------------------------------------------------------------------*/
526