1 /*  testQRgridMT.c  */
2 
3 #include "../spoolesMT.h"
4 #include "../../FrontMtx.h"
5 #include "../../Drand.h"
6 #include "../../timings.h"
7 
8 /*--------------------------------------------------------------------*/
9 void mkNDlinsysQR ( int n1, int n2, int n3, int type, int nrhs,
10    int seed, int msglvl, FILE *msgFile, ETree **pfrontETree,
11    IVL **psymbfacIVL, InpMtx **pmtxA, DenseMtx **pmtxX,
12    DenseMtx **pmtxB) ;
13 /*--------------------------------------------------------------------*/
14 int
main(int argc,char * argv[])15 main ( int argc, char *argv[] )
16 /*
17    ---------------------------------------------------
18    test the QR factor method for a FrontMtx object
19    on an n1 x n2 x n3 grid
20 
21    (1) generate an overdetermined system AX = B
22    (2) factor the matrix
23    (3) solve the systems
24 
25    created  -- 98may29, cca
26    ---------------------------------------------------
27 */
28 {
29 ChvManager      *chvmanager ;
30 DenseMtx        *mtxB, *mtxX, *mtxZ ;
31 double          cputotal, cutoff, factorops ;
32 double          cpus[10] ;
33 double          nops, t1, t2 ;
34 DV              *cumopsDV ;
35 ETree           *frontETree ;
36 FILE            *msgFile ;
37 FrontMtx        *frontmtx ;
38 InpMtx          *mtxA ;
39 int             maptype, msglvl, neqns, nrhs, nthread,
40                 n1, n2, n3, seed, type ;
41 IV              *frontOwnersIV ;
42 IVL             *symbfacIVL ;
43 SolveMap        *solvemap ;
44 SubMtxManager   *mtxmanager ;
45 
46 if ( argc != 12 ) {
47    fprintf(stdout,
48       "\n\n usage : %s msglvl msgFile n1 n2 n3 seed nrhs type "
49       "\n         nthread maptype cutoff"
50       "\n    msglvl  -- message level"
51       "\n    msgFile -- message file"
52       "\n    n1      -- # of points in the first direction"
53       "\n    n2      -- # of points in the second direction"
54       "\n    n3      -- # of points in the third direction"
55       "\n    seed    -- random number seed"
56       "\n    nrhs    -- # of right hand sides"
57       "\n    type    -- type of linear system"
58       "\n      1 -- real"
59       "\n      2 -- complex"
60       "\n    nthread -- number of threads"
61       "\n    maptype -- type of map from fronts to threads"
62       "\n       1 --> wrap map"
63       "\n       2 --> balanced map via a post-order traversal"
64       "\n       3 --> subtree-subset map"
65       "\n       4 --> domain decomposition map"
66       "\n    cutoff  -- cutoff used for domain decomposition map"
67       "\n       0 <= cutoff <= 1 used to define the multisector"
68       "\n", argv[0]) ;
69    return(0) ;
70 }
71 msglvl = atoi(argv[1]) ;
72 if ( strcmp(argv[2], "stdout") == 0 ) {
73    msgFile = stdout ;
74 } else if ( (msgFile = fopen(argv[2], "a")) == NULL ) {
75    fprintf(stderr, "\n fatal error in %s"
76            "\n unable to open file %s\n",
77            argv[0], argv[2]) ;
78    return(-1) ;
79 }
80 n1      = atoi(argv[3]) ;
81 n2      = atoi(argv[4]) ;
82 n3      = atoi(argv[5]) ;
83 seed    = atoi(argv[6]) ;
84 nrhs    = atoi(argv[7]) ;
85 type    = atoi(argv[8]) ;
86 nthread = atoi(argv[8]) ;
87 maptype = atoi(argv[8]) ;
88 cutoff  = atoi(argv[8]) ;
89 fprintf(msgFile,
90         "\n %s "
91         "\n msglvl  -- %d"
92         "\n msgFile -- %s"
93         "\n n1      -- %d"
94         "\n n2      -- %d"
95         "\n n3      -- %d"
96         "\n seed    -- %d"
97         "\n nrhs    -- %d"
98         "\n type    -- %d"
99         "\n nthread -- %d"
100         "\n maptype -- %d"
101         "\n cutoff  -- %f"
102         "\n",
103         argv[0], msglvl, argv[2], n1, n2, n3, seed, nrhs, type,
104         nthread, maptype, cutoff) ;
105 fflush(msgFile) ;
106 neqns = n1*n2*n3 ;
107 if ( type != SPOOLES_REAL && type != SPOOLES_COMPLEX ) {
108    fprintf(stderr, "\n fatal error, type must be real or complex") ;
109    exit(-1) ;
110 }
111 /*
112    ------------------------------------------
113    generate the A X = B overdetermined system
114    ------------------------------------------
115 */
116 mkNDlinsysQR(n1, n2, n3, type, nrhs, seed, msglvl, msgFile,
117              &frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ;
118 fprintf(msgFile, "\n\n %d entries in A", InpMtx_nent(mtxA)) ;
119 /*
120    --------------------------------------------------
121    initialize the cumulative operations metric object
122    --------------------------------------------------
123 */
124 cumopsDV = DV_new() ;
125 DV_init(cumopsDV, nthread, NULL) ;
126 DV_fill(cumopsDV, 0.0) ;
127 /*
128    -------------------------------
129    create the owners map IV object
130    -------------------------------
131 */
132 switch ( maptype ) {
133 case 1 :
134    frontOwnersIV = ETree_wrapMap(frontETree, type,
135                                  SPOOLES_SYMMETRIC, cumopsDV) ;
136    break ;
137 case 2 :
138    frontOwnersIV = ETree_balancedMap(frontETree, type,
139                                  SPOOLES_SYMMETRIC, cumopsDV) ;
140    break ;
141 case 3 :
142    frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type,
143                                  SPOOLES_SYMMETRIC, cumopsDV) ;
144    break ;
145 case 4 :
146    frontOwnersIV = ETree_ddMap(frontETree, type,
147                                  SPOOLES_SYMMETRIC, cumopsDV, cutoff) ;
148    break ;
149 }
150 if ( msglvl > 1 ) {
151    fprintf(msgFile, "\n\n totalOps = %.0f", DV_sum(cumopsDV)) ;
152    DVscale(DV_size(cumopsDV), DV_entries(cumopsDV),
153             nthread/DV_sum(cumopsDV)) ;
154    fprintf(msgFile, "\n\n cumopsDV") ;
155    DV_writeForHumanEye(cumopsDV, msgFile) ;
156 }
157 if ( msglvl > 1 ) {
158    fprintf(msgFile, "\n\n frontOwnersIV") ;
159    IV_writeForHumanEye(frontOwnersIV, msgFile) ;
160    fflush(msgFile) ;
161 }
162 /*
163    ------------------------------
164    initialize the FrontMtx object
165    ------------------------------
166 */
167 MARKTIME(t1) ;
168 mtxmanager = SubMtxManager_new() ;
169 SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ;
170 frontmtx = FrontMtx_new() ;
171 if ( type == SPOOLES_REAL ) {
172    FrontMtx_init(frontmtx, frontETree, symbfacIVL, type,
173                  SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS,
174                  SPOOLES_NO_PIVOTING, LOCK_IN_PROCESS,
175                  0, NULL, mtxmanager, msglvl, msgFile) ;
176 } else if ( type == SPOOLES_COMPLEX ) {
177    FrontMtx_init(frontmtx, frontETree, symbfacIVL, type,
178                  SPOOLES_HERMITIAN, FRONTMTX_DENSE_FRONTS,
179                  SPOOLES_NO_PIVOTING, LOCK_IN_PROCESS,
180                  0, NULL, mtxmanager, msglvl, msgFile) ;
181 }
182 MARKTIME(t2) ;
183 fprintf(msgFile, "\n CPU %8.3f : initialize the front matrix",
184         t2 - t1) ;
185 fprintf(msgFile, "\n\n %d entries in D, %d entries in U",
186         frontmtx->nentD, frontmtx->nentU) ;
187 /*
188    -----------------
189    factor the matrix
190    -----------------
191 */
192 DVzero(10, cpus) ;
193 InpMtx_changeCoordType(mtxA, INPMTX_BY_ROWS) ;
194 InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
195 chvmanager = ChvManager_new() ;
196 ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ;
197 MARKTIME(t1) ;
198 FrontMtx_MT_QR_factor(frontmtx, mtxA, chvmanager, frontOwnersIV,
199                       cpus, &factorops, msglvl, msgFile) ;
200 MARKTIME(t2) ;
201 fprintf(msgFile,
202         "\n CPU %8.3f : FrontMtx_MT_QR_factor, %.0f ops, %.2f mflops",
203         t2 - t1, factorops, 1.e-6*factorops/(t2-t1)) ;
204 cputotal = cpus[6] ;
205 if ( cputotal > 0.0 ) {
206    fprintf(msgFile, "\n"
207    "\n                              CPU    %%"
208    "\n    setup factorization  %8.3f %6.2f"
209    "\n    setup fronts         %8.3f %6.2f"
210    "\n    factor fronts        %8.3f %6.2f"
211    "\n    store factor         %8.3f %6.2f"
212    "\n    store update         %8.3f %6.2f"
213    "\n    miscellaneous        %8.3f %6.2f"
214    "\n    total time           %8.3f"
215    "\n    wall clock time      %8.3f",
216    cpus[0], 100.*cpus[0]/cputotal,
217    cpus[1], 100.*cpus[1]/cputotal,
218    cpus[2], 100.*cpus[2]/cputotal,
219    cpus[3], 100.*cpus[3]/cputotal,
220    cpus[4], 100.*cpus[4]/cputotal,
221    cpus[5], 100.*cpus[5]/cputotal,
222    cpus[6], t2 - t1) ;
223 }
224 fprintf(msgFile, "\n\n ChvManager statistics") ;
225 ChvManager_writeForHumanEye(chvmanager, msgFile) ;
226 /*
227    ------------------------------
228    post-process the factor matrix
229    ------------------------------
230 */
231 MARKTIME(t1) ;
232 FrontMtx_postProcess(frontmtx, msglvl, msgFile) ;
233 MARKTIME(t2) ;
234 fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ;
235 if ( msglvl > 2 ) {
236    fprintf(msgFile, "\n\n front factor matrix after post-processing") ;
237    FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
238 }
239 fprintf(msgFile, "\n\n after post-processing") ;
240 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
241 /*
242    ----------------
243    solve the system
244    ----------------
245 */
246 mtxZ = DenseMtx_new() ;
247 DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ;
248 DenseMtx_zero(mtxZ) ;
249 if ( type == SPOOLES_REAL ) {
250    nops = frontmtx->nentD + 2*frontmtx->nentU ;
251    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
252       nops += 2*frontmtx->nentL ;
253    } else {
254       nops += 2*frontmtx->nentU ;
255    }
256 } else if ( type == SPOOLES_COMPLEX ) {
257    nops = 8*frontmtx->nentD + 8*frontmtx->nentU ;
258    if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
259       nops += 8*frontmtx->nentL ;
260    } else {
261       nops += 8*frontmtx->nentU ;
262    }
263 }
264 nops *= nrhs ;
265 if ( msglvl > 2 ) {
266    fprintf(msgFile, "\n\n rhs") ;
267    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
268    fflush(stdout) ;
269 }
270 DVzero(7, cpus) ;
271 MARKTIME(t1) ;
272 FrontMtx_QR_solve(frontmtx, mtxA, mtxZ, mtxB, mtxmanager,
273                   cpus, msglvl, msgFile) ;
274 MARKTIME(t2) ;
275 fprintf(msgFile,
276      "\n\n CPU %8.3f : serial solve, %d rhs, %.0f ops, %.3f mflops",
277      t2 - t1, nrhs, nops, 1.e-6*nops/(t2 - t1)) ;
278 cputotal = t2 - t1 ;
279 if ( cputotal > 0.0 ) {
280    fprintf(msgFile, "\n"
281    "\n                                        CPU    %%"
282    "\n    A^TB matrix-matrix multiply    %8.3f %6.2f"
283    "\n    total solve time               %8.3f %6.2f"
284    "\n       set up solves                 %8.3f %6.2f"
285    "\n       load rhs and store solution   %8.3f %6.2f"
286    "\n       forward solve                 %8.3f %6.2f"
287    "\n       diagonal solve                %8.3f %6.2f"
288    "\n       backward solve                %8.3f %6.2f"
289    "\n    total QR solve time            %8.3f",
290    cpus[6], 100.*cpus[6]/cputotal,
291    cpus[5], 100.*cpus[5]/cputotal,
292    cpus[0], 100.*cpus[0]/cputotal,
293    cpus[1], 100.*cpus[1]/cputotal,
294    cpus[2], 100.*cpus[2]/cputotal,
295    cpus[3], 100.*cpus[3]/cputotal,
296    cpus[4], 100.*cpus[4]/cputotal,
297    cputotal) ;
298 }
299 if ( msglvl > 3 ) {
300    fprintf(msgFile, "\n\n serial solve computed solution") ;
301    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
302    fflush(stdout) ;
303 }
304 /*
305    -----------------
306    compute the error
307    -----------------
308 */
309 DenseMtx_sub(mtxZ, mtxX) ;
310 fprintf(msgFile, "\n\n serial solve: maxabs error = %12.4e",
311 DenseMtx_maxabs(mtxZ)) ;
312 if ( msglvl > 2 ) {
313    fprintf(msgFile, "\n\n error") ;
314    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
315    fflush(stdout) ;
316 }
317 fprintf(msgFile, "\n\n after serial solve") ;
318 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
319 /*
320    --------------------------------
321    now solve the system in parallel
322    --------------------------------
323 */
324 solvemap = SolveMap_new() ;
325 SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC,
326                FrontMtx_upperBlockIVL(frontmtx), NULL,
327                nthread, frontOwnersIV, frontmtx->tree,
328                seed, msglvl, msgFile) ;
329 fprintf(msgFile, "\n solve map created") ;
330 fflush(msgFile) ;
331 if ( msglvl > 2 ) {
332    fprintf(msgFile, "\n\n rhs") ;
333    DenseMtx_writeForHumanEye(mtxB, msgFile) ;
334    fflush(stdout) ;
335 }
336 DenseMtx_zero(mtxZ) ;
337 MARKTIME(t1) ;
338 FrontMtx_MT_QR_solve(frontmtx, mtxA, mtxZ, mtxB, mtxmanager,
339                      solvemap, cpus, msglvl, msgFile) ;
340 MARKTIME(t2) ;
341 fprintf(msgFile,
342      "\n\n CPU %8.3f : parallel solve, %d rhs, %.0f ops, %.3f mflops",
343      t2 - t1, nrhs, nops, 1.e-6*nops/(t2 - t1)) ;
344 cputotal = t2 - t1 ;
345 if ( cputotal > 0.0 ) {
346    fprintf(msgFile, "\n"
347    "\n                                        CPU    %%"
348    "\n    A^TB matrix-matrix multiply    %8.3f %6.2f"
349    "\n    total solve time               %8.3f %6.2f"
350    "\n       set up solves                 %8.3f %6.2f"
351    "\n       load rhs and store solution   %8.3f %6.2f"
352    "\n       forward solve                 %8.3f %6.2f"
353    "\n       diagonal solve                %8.3f %6.2f"
354    "\n       backward solve                %8.3f %6.2f"
355    "\n    total QR solve time            %8.3f",
356    cpus[6], 100.*cpus[6]/cputotal,
357    cpus[5], 100.*cpus[5]/cputotal,
358    cpus[0], 100.*cpus[0]/cputotal,
359    cpus[1], 100.*cpus[1]/cputotal,
360    cpus[2], 100.*cpus[2]/cputotal,
361    cpus[3], 100.*cpus[3]/cputotal,
362    cpus[4], 100.*cpus[4]/cputotal,
363    cputotal) ;
364 }
365 if ( msglvl > 3 ) {
366    fprintf(msgFile, "\n\n computed solution from parallel solve") ;
367    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
368    fflush(stdout) ;
369 }
370 /*
371    -----------------
372    compute the error
373    -----------------
374 */
375 DenseMtx_sub(mtxZ, mtxX) ;
376 fprintf(msgFile, "\n\n parallel solve: maxabs error = %12.4e",
377 DenseMtx_maxabs(mtxZ)) ;
378 if ( msglvl > 2 ) {
379    fprintf(msgFile, "\n\n error") ;
380    DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
381    fflush(stdout) ;
382 }
383 fprintf(msgFile, "\n\n after parallel solve") ;
384 SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
385 /*
386    ------------------------
387    free the working storage
388    ------------------------
389 */
390 InpMtx_free(mtxA) ;
391 DenseMtx_free(mtxX) ;
392 DenseMtx_free(mtxZ) ;
393 DenseMtx_free(mtxB) ;
394 FrontMtx_free(frontmtx) ;
395 IVL_free(symbfacIVL) ;
396 ETree_free(frontETree) ;
397 SubMtxManager_free(mtxmanager) ;
398 ChvManager_free(chvmanager) ;
399 DV_free(cumopsDV) ;
400 IV_free(frontOwnersIV) ;
401 SolveMap_free(solvemap) ;
402 
403 fprintf(msgFile, "\n") ;
404 fclose(msgFile) ;
405 
406 return(1) ; }
407 
408 /*--------------------------------------------------------------------*/
409