1 /*****************************************************************************
2 /
3 / SPACE (SPArse Cholesky Elimination) Library: interface.c
4 /
5 / author J"urgen Schulze, University of Paderborn
6 / created 01jan26
7 /
8 / This file contains some high level interface functions (only these
9 / functions should be called by a user).
10 /
11 ******************************************************************************/
12
13
14 #include <space.h>
15
16 /*****************************************************************************
17 o Input:
18 undirected graph G
19 options -- if NULL, default options are used
20 option[0] holds OPTION_ORDTYPE
21 option[1] holds OPTION_NODE_SELECTION1
22 option[2] holds OPTION_NODE_SELECTION2
23 option[3] holds OPTION_NODE_SELECTION3
24 option[4] holds OPTION_DOMAIN_SIZE
25 option[5] holds OPTION_MSGLVL
26 o Output:
27 elimination/front tree T reflecting the ordering of G
28 cpus -- if NULL, no timing information is pulled back
29 cpus[0] holds TIME_COMPRESS
30 cpus[1] holds TIME_MS
31 cpus[2] holds TIME_MULTILEVEL
32 cpus[3] holds TIME_INITDOMDEC
33 cpus[4] holds TIME_COARSEDOMDEC
34 cpus[5] holds TIME_INITSEP
35 cpus[6] holds TIME_REFINESEP
36 cpus[7] holds TIME_SMOOTH
37 cpus[8] holds TIME_BOTTOMUP
38 cpus[9] holds TIME_UPDADJNCY
39 cpus[10] holds TIME_FINDINODES
40 cpus[11] holds TIME_UPDSCORE
41 o Comments:
42 this function computes an ordering for G; it returns an elimination
43 tree T; permutation vectors perm, invp can be extracted from T by
44 calling function permFromElimTree(T, perm, invp)
45 ******************************************************************************/
46 elimtree_t*
SPACE_ordering(graph_t * G,options_t * options,timings_t * cpus)47 SPACE_ordering(graph_t *G, options_t *options, timings_t *cpus)
48 { graph_t *Gc;
49 multisector_t *ms;
50 minprior_t *minprior;
51 elimtree_t *T, *T2;
52 timings_t cpusOrd[ORD_TIME_SLOTS];
53 options_t default_options[] = { SPACE_ORDTYPE, SPACE_NODE_SELECTION1,
54 SPACE_NODE_SELECTION2, SPACE_NODE_SELECTION3,
55 SPACE_DOMAIN_SIZE, SPACE_MSGLVL };
56 int *vtxmap, istage, totnstep, totnzf;
57 FLOAT totops;
58
59 /* --------------------------------------------------
60 set default options, if no other options specified
61 -------------------------------------------------- */
62 if (options == NULL)
63 options = default_options;
64
65 /* ----------------
66 reset all timers
67 ---------------- */
68 resettimer(cpusOrd[TIME_COMPRESS]);
69 resettimer(cpusOrd[TIME_MS]);
70 resettimer(cpusOrd[TIME_MULTILEVEL]);
71 resettimer(cpusOrd[TIME_INITDOMDEC]);
72 resettimer(cpusOrd[TIME_COARSEDOMDEC]);
73 resettimer(cpusOrd[TIME_INITSEP]);
74 resettimer(cpusOrd[TIME_REFINESEP]);
75 resettimer(cpusOrd[TIME_SMOOTH]);
76 resettimer(cpusOrd[TIME_BOTTOMUP]);
77 resettimer(cpusOrd[TIME_UPDADJNCY]);
78 resettimer(cpusOrd[TIME_FINDINODES]);
79 resettimer(cpusOrd[TIME_UPDSCORE]);
80
81 /* ------------------
82 compress the graph
83 ------------------ */
84 starttimer(cpusOrd[TIME_COMPRESS]);
85 mymalloc(vtxmap, G->nvtx, int);
86 Gc = compressGraph(G, vtxmap);
87 stoptimer(cpusOrd[TIME_COMPRESS]);
88
89 if (Gc != NULL)
90 { if (options[OPTION_MSGLVL] > 0)
91 printf("compressed graph constructed (#nodes %d, #edges %d)\n",
92 Gc->nvtx, Gc->nedges >> 1);
93 }
94 else
95 { Gc = G;
96 free(vtxmap);
97 if (options[OPTION_MSGLVL] > 0)
98 printf("no compressed graph constructed\n");
99 }
100
101 /* -------------------
102 compute multisector
103 ------------------- */
104
105
106 starttimer(cpusOrd[TIME_MS]);
107 ms = constructMultisector(Gc, options, cpusOrd);
108 stoptimer(cpusOrd[TIME_MS]);
109
110
111 if (options[OPTION_MSGLVL] > 0)
112 printf("quality of multisector: #stages %d, #nodes %d, weight %d\n",
113 ms->nstages, ms->nnodes, ms->totmswght);
114
115 /* ---------------------------------
116 compute minimum priority ordering
117 --------------------------------- */
118 starttimer(cpusOrd[TIME_BOTTOMUP])
119 minprior = setupMinPriority(ms);
120 T = orderMinPriority(minprior, options, cpusOrd);
121 stoptimer(cpusOrd[TIME_BOTTOMUP]);
122
123 if (options[OPTION_MSGLVL] > 0)
124 { totnstep = totnzf = 0;
125 totops = 0.0;
126 for (istage = 0; istage < ms->nstages; istage++)
127 { totnstep += minprior->stageinfo[istage].nstep;
128 totnzf += minprior->stageinfo[istage].nzf;
129 totops += minprior->stageinfo[istage].ops;
130 }
131 printf("quality of ordering: #steps %d, nzl %d, ops %e\n", totnstep,
132 totnzf, totops);
133 }
134
135 /* -----------------------
136 expand elimination tree
137 ----------------------- */
138 if (Gc != G)
139 { T2 = expandElimTree(T, vtxmap, G->nvtx);
140 freeElimTree(T);
141 freeGraph(Gc);
142 free(vtxmap);
143 }
144 else T2 = T;
145
146 /* --------------------------------------------------
147 pull back timing results, if vector cpus available
148 -------------------------------------------------- */
149 if (cpus != NULL)
150 { cpus[0] = cpusOrd[TIME_COMPRESS];
151 cpus[1] = cpusOrd[TIME_MS];
152 cpus[2] = cpusOrd[TIME_MULTILEVEL];
153 cpus[3] = cpusOrd[TIME_INITDOMDEC];
154 cpus[4] = cpusOrd[TIME_COARSEDOMDEC];
155 cpus[5] = cpusOrd[TIME_INITSEP];
156 cpus[6] = cpusOrd[TIME_REFINESEP];
157 cpus[7] = cpusOrd[TIME_SMOOTH];
158 cpus[8] = cpusOrd[TIME_BOTTOMUP];
159 cpus[9] = cpusOrd[TIME_UPDADJNCY];
160 cpus[10] = cpusOrd[TIME_FINDINODES];
161 cpus[11] = cpusOrd[TIME_UPDSCORE];
162 }
163
164 /* ----------------------
165 free memory and return
166 ---------------------- */
167 freeMultisector(ms);
168 freeMinPriority(minprior);
169 return(T2);
170 }
171
172
173 #if defined(cleaned_version)
174 /*****************************************************************************
175 o Input:
176 elimination/front tree T
177 max. number of zeros that is allowed to be introduced in front
178 o Output:
179 transformed elimination/front tree T'
180 o Comments:
181 the goal is to make T (obtained by orderMinPriority or
182 setupElimTree) more appropiate for the multifrontal algorithm
183 ******************************************************************************/
184 elimtree_t*
SPACE_transformElimTree(elimtree_t * T,int maxzeros)185 SPACE_transformElimTree(elimtree_t *T, int maxzeros)
186 { elimtree_t *T2, *T3;
187
188 /* -----------------------------------------------------
189 1st: determine the fundamental fronts
190 this step significantly improves the cache reuse
191 ----------------------------------------------------- */
192 T2 = fundamentalFronts(T);
193
194 /* -----------------------------------------------------------------
195 2nd: group together small subtrees into one front
196 this step reduces the number of fronts and thus the overhead
197 associated with them; the expense is added storage for the
198 logically zero entries and the factor operations on them
199 ------------------------------------------------------------------ */
200 T3 = mergeFronts(T2, maxzeros);
201 freeElimTree(T2);
202
203 /* --------------------------------------------------------------
204 3rd: order the children of a front so that the working storage
205 in the multifrontal algorithm is minimized
206 -------------------------------------------------------------- */
207 (void)justifyFronts(T3);
208 return(T3);
209 }
210
211 /*****************************************************************************
212 o Input:
213 transformed elimination/front tree T, input matrix A
214 o Output:
215 initial factor matrix L of the permuted input matrix PAP
216 o Comments: L contains nonzeros of PAP; all other entries are set to 0.0
217 ******************************************************************************/
218 factorMtx_t*
SPACE_symbFac(elimtree_t * T,inputMtx_t * A)219 SPACE_symbFac(elimtree_t *T, inputMtx_t *A)
220 { factorMtx_t *L;
221 frontsub_t *frontsub;
222 css_t *css;
223 inputMtx_t *PAP;
224 elimtree_t *PTP;
225 int *perm, neqs, nelem;
226
227 /* ------------------------------------------------------
228 extract permutation vectors from T and permute T and A
229 ------------------------------------------------------ */
230 neqs = A->neqs;
231 mymalloc(perm, neqs, int);
232 permFromElimTree(T, perm);
233 PTP = permuteElimTree(T, perm);
234 PAP = permuteInputMtx(A, perm);
235
236 /* -------------------------------------------------------------------
237 create factor matrix L of PAP, i.e.
238 (1) create the subscript structure of the fronts, i.e. frontsub
239 (2) use frontsub to create the compressed subscript structure of L
240 (3) allocate memory for L and the nonzeros of L, i.e. L->nzl
241 (4) init. L with the nonzeros of PAP
242 ------------------------------------------------------------------- */
243 frontsub = setupFrontSubscripts(PTP, PAP);
244 css = setupCSSFromFrontSubscripts(frontsub);
245
246 nelem = css->xnzl[neqs];
247 L = newFactorMtx(nelem);
248 L->perm = perm;
249 L->frontsub = frontsub;
250 L->css = css;
251
252 initFactorMtx(L, PAP);
253
254 /* -----------------------------------------------------
255 free permuted input matrix and return
256 note: PTP and perm have been inherited by frontsub, L
257 ----------------------------------------------------- */
258 freeInputMtx(PAP);
259 return(L);
260 }
261
262
263 /*****************************************************************************
264 o Input:
265 transformed elimination/front tree
266 initial factor matrix L of the permuted input matrix PAP
267 o Output:
268 factor matrix L of the permuted input matrix PAP
269 cpus -- if NULL no timing information is pulled back
270 cpus[0] holds TIME_INITFRONT
271 cpus[1] holds TIME_EXPAND
272 cpus[2] holds TIME_KERNEL
273 cpus[3] holds TIME_INITUPD
274 o Comments:
275 this function does the actual numerical factorization; to
276 improve register and cache reuse it uses a kernel of size 3x3
277 ******************************************************************************/
278 void
SPACE_numFac(factorMtx_t * L,timings_t * cpus)279 SPACE_numFac(factorMtx_t *L, timings_t *cpus)
280 { timings_t cpusFactor[NUMFAC_TIME_SLOTS];
281
282 /* ----------------
283 reset all timers
284 ---------------- */
285 resettimer(cpusFactor[TIME_INITFRONT]);
286 resettimer(cpusFactor[TIME_EXADD]);
287 resettimer(cpusFactor[TIME_KERNEL]);
288 resettimer(cpusFactor[TIME_INITUPD]);
289
290 /* -------------------------
291 compute Cholesky factor L
292 ------------------------- */
293 numfac(L, cpusFactor);
294
295 /* --------------------------------------------------
296 pull back timing results, if vector cpus available
297 -------------------------------------------------- */
298 if (cpus != NULL)
299 { cpus[0] = cpusFactor[TIME_INITFRONT];
300 cpus[1] = cpusFactor[TIME_EXADD];
301 cpus[2] = cpusFactor[TIME_KERNEL];
302 cpus[3] = cpusFactor[TIME_INITUPD];
303 }
304 }
305
306
307 /*****************************************************************************
308 o Input:
309 transformed elimination/front tree
310 factor matrix L of the permuted input matrix PAP
311 right hand side vector rhs of the original system Ax = b
312 o Output:
313 solution vector xvec of the original system Ax = b
314 o Comments:
315 this function solves the remaining triangular systems;
316 ******************************************************************************/
317 void
SPACE_solveTriangular(factorMtx_t * L,FLOAT * rhs,FLOAT * xvec)318 SPACE_solveTriangular(factorMtx_t *L, FLOAT *rhs, FLOAT *xvec)
319 { FLOAT *yvec;
320 int *perm;
321 int neqs, k;
322
323 perm = L->perm;
324 neqs = L->css->neqs;
325
326 /* -------------------------------------------
327 set up permuted right hand side vector yvec
328 ------------------------------------------- */
329 mymalloc(yvec, neqs, FLOAT);
330 for (k = 0; k < neqs; k++)
331 yvec[perm[k]] = rhs[k];
332
333 /* -------------------------
334 solve Ly = b and L^Tz = y
335 ------------------------- */
336 forwardSubst1x1(L, yvec);
337 backwardSubst1x1(L, yvec);
338
339 /* ---------------------------------------------------------------
340 extract from yvec the solution vector of the un-permuted system
341 --------------------------------------------------------------- */
342 for (k = 0; k < neqs; k++)
343 xvec[k] = yvec[perm[k]];
344 free(yvec);
345 }
346
347
348 /*****************************************************************************
349 o Input:
350 sparse matrix A, right hand side vector rhs
351 options -- if NULL, default options are used
352 option[0] holds OPTION_ORDTYPE
353 option[1] holds OPTION_NODE_SELECTION1
354 option[2] holds OPTION_NODE_SELECTION2
355 option[3] holds OPTION_NODE_SELECTION3
356 option[4] holds OPTION_DOMAIN_SIZE
357 option[5] holds OPTION_MSGLVL
358 option[6] holds OPTION_ETREE_NONZ
359 o Output:
360 solution vector xvec of the original system Ax = b
361 cpus -- if NULL, no timing information is pulled back
362 cpus[0] holds time to construct the graph
363 cpus[1] holds time to compute the ordering
364 cpus[2] holds TIME_COMPRESS
365 cpus[3] holds TIME_MS
366 cpus[4] holds TIME_MULTILEVEL
367 cpus[5] holds TIME_INITDOMDEC
368 cpus[6] holds TIME_COARSEDOMDEC
369 cpus[7] holds TIME_INITSEP
370 cpus[8] holds TIME_REFINESEP
371 cpus[9] holds TIME_SMOOTH
372 cpus[10] holds TIME_BOTTOMUP
373 cpus[11] holds TIME_UPDADJNCY;
374 cpus[12] holds TIME_FINDINODES
375 cpus[13] holds TIME_UPDSCORE
376 cpus[14] holds time to transform the elimination tree
377 cpus[15] holds time to compute the symbolical factorization
378 cpus[16] holds time to compute the numerical factorization
379 cpus[17] holds TIME_INITFRONT
380 cpus[18] holds TIME_EXADD
381 cpus[19] holds TIME_KERNEL
382 cpus[20] holds TIME_INITUPD
383 cpus[21] holds time to solve the triangular systems
384 o Comments:
385 this is the final topmost function that can be used as a black
386 box in other algorithm; it provides a general purpose direct
387 solver for large sparse positive definite systems
388 ******************************************************************************/
389 void
SPACE_solve(inputMtx_t * A,FLOAT * rhs,FLOAT * xvec,options_t * options,timings_t * cpus)390 SPACE_solve(inputMtx_t *A, FLOAT *rhs, FLOAT *xvec, options_t *options,
391 timings_t *cpus)
392 { graph_t *G;
393 elimtree_t *T, *T2;
394 factorMtx_t *L;
395 timings_t cpusOrd[ORD_TIME_SLOTS], cpusFactor[NUMFAC_TIME_SLOTS];
396 timings_t t_graph, t_ord, t_etree, t_symb, t_num, t_solvetri;
397 options_t default_options[] = { SPACE_ORDTYPE, SPACE_NODE_SELECTION1,
398 SPACE_NODE_SELECTION2, SPACE_NODE_SELECTION3,
399 SPACE_DOMAIN_SIZE, SPACE_MSGLVL, SPACE_ETREE_NONZ };
400
401 /* --------------------------------------------------
402 set default options, if no other options specified
403 -------------------------------------------------- */
404 if (options == NULL)
405 options = default_options;
406
407 /* ----------------
408 reset all timers
409 ---------------- */
410 resettimer(t_graph);
411 resettimer(t_ord);
412 resettimer(t_etree);
413 resettimer(t_symb);
414 resettimer(t_num);
415 resettimer(t_solvetri);
416
417 /* -----------------
418 set up graph G(A)
419 ----------------- */
420 starttimer(t_graph);
421 G = setupGraphFromMtx(A);
422 stoptimer(t_graph);
423
424 if (options[OPTION_MSGLVL] > 0)
425 printf("\ninduced graph constructed: #vertices %d, #edges %d, #components "
426 "%d\n", G->nvtx, G->nedges >> 1, connectedComponents(G));
427
428 /* --------------------------------------------
429 construct ordering/elimination tree for G(A)
430 -------------------------------------------- */
431 starttimer(t_ord);
432 T = SPACE_ordering(G, options, cpusOrd);
433 stoptimer(t_ord);
434 freeGraph(G);
435
436 if (options[OPTION_MSGLVL] > 0)
437 printf("quality of initial elim. tree: #fronts %d, #indices %d\n\t"
438 "nzl %d, ops %e, wspace %d\n", T->nfronts, nFactorIndices(T),
439 nFactorEntries(T), nFactorOps(T), nWorkspace(T));
440
441 /* -------------------------------
442 elimination tree transformation
443 ------------------------------- */
444 starttimer(t_etree);
445 T2 = SPACE_transformElimTree(T, options[OPTION_ETREE_NONZ]);
446 stoptimer(t_etree);
447 freeElimTree(T);
448
449 if (options[OPTION_MSGLVL] > 0)
450 printf("quality of transformed elim. tree: #fronts %d, #indices %d\n\t"
451 "nzl %d, ops %e, wspace %d\n", T2->nfronts, nFactorIndices(T2),
452 nFactorEntries(T2), nFactorOps(T2), nWorkspace(T2));
453
454 /* ------------------------
455 symbolical factorization
456 ------------------------ */
457 starttimer(t_symb);
458 L = SPACE_symbFac(T2, A);
459 stoptimer(t_symb);
460
461 if (options[OPTION_MSGLVL] > 0)
462 printf("quality of factor matrix:\n\tneqs %d, #indices %d, nzl %d\n",
463 L->css->neqs, L->css->nind, L->nelem);
464
465 /* -----------------------
466 numerical factorization
467 ----------------------- */
468 starttimer(t_num);
469 SPACE_numFac(L, cpusFactor);
470 stoptimer(t_num);
471
472 if (options[OPTION_MSGLVL] > 0)
473 printf("performance of numerical factorization: %6.2f mflops\n",
474 (double)nFactorOps(T2) / t_num / 1000000);
475
476 /* ------------------------------
477 solution of triangular systems
478 ------------------------------ */
479 starttimer(t_solvetri);
480 SPACE_solveTriangular(L, rhs, xvec);
481 stoptimer(t_solvetri);
482
483 if (options[OPTION_MSGLVL] > 0)
484 printf("performance of forward/backward solve: %6.2f mflops\n",
485 (double)nTriangularOps(T2) / t_solvetri / 1000000);
486
487 freeElimTree(T2);
488 freeFactorMtx(L);
489
490 /* --------------------------------------------------
491 pull back timing results, if vector cpus available
492 -------------------------------------------------- */
493 if (cpus != NULL)
494 { cpus[0] = t_graph;
495 cpus[1] = t_ord;
496 cpus[2] = cpusOrd[TIME_COMPRESS];
497 cpus[3] = cpusOrd[TIME_MS];
498 cpus[4] = cpusOrd[TIME_MULTILEVEL];
499 cpus[5] = cpusOrd[TIME_INITDOMDEC];
500 cpus[6] = cpusOrd[TIME_COARSEDOMDEC];
501 cpus[7] = cpusOrd[TIME_INITSEP];
502 cpus[8] = cpusOrd[TIME_REFINESEP];
503 cpus[9] = cpusOrd[TIME_SMOOTH];
504 cpus[10] = cpusOrd[TIME_BOTTOMUP];
505 cpus[11] = cpusOrd[TIME_UPDADJNCY];
506 cpus[12] = cpusOrd[TIME_FINDINODES];
507 cpus[13] = cpusOrd[TIME_UPDSCORE];
508 cpus[14] = t_etree;
509 cpus[15] = t_symb;
510 cpus[16] = t_num;
511 cpus[17] = cpusFactor[TIME_INITFRONT];
512 cpus[18] = cpusFactor[TIME_EXADD];
513 cpus[19] = cpusFactor[TIME_KERNEL];
514 cpus[20] = cpusFactor[TIME_INITUPD];
515 cpus[21] = t_solvetri;
516 }
517 }
518
519
520 /*****************************************************************************
521 o Input:
522 sparse matrix A with permutation vector perm
523 right hand side vector rhs
524 options -- if NULL, default options are used
525 option[0] holds OPTION_MSGLVL
526 option[1] holds OPTION_ETREE_NONZ
527 o Output:
528 solution vector xvec of the original system Ax = b
529 cpus -- if NULL, no timing information is pulled back
530 cpus[0] holds time to construct the graph
531 cpus[1] holds time to construct the elimination tree
532 cpus[2] holds time to transform the elimination tree
533 cpus[3] holds time to compute the symbolical factorization
534 cpus[4] holds time to compute the numerical factorization
535 cpus[5] holds TIME_INITFRONT
536 cpus[6] holds TIME_EXADD
537 cpus[7] holds TIME_KERNEL
538 cpus[8] holds TIME_INITUPD
539 cpus[9] holds time to solve the triangular systems
540 o Comments:
541 this function can be used to solve an equation system
542 using an externally computed permutation vector
543 ******************************************************************************/
544 void
SPACE_solveWithPerm(inputMtx_t * A,int * perm,FLOAT * rhs,FLOAT * xvec,options_t * options,timings_t * cpus)545 SPACE_solveWithPerm(inputMtx_t *A, int *perm, FLOAT *rhs, FLOAT *xvec,
546 options_t *options, timings_t *cpus)
547 { graph_t *G;
548 elimtree_t *T, *T2;
549 factorMtx_t *L;
550 timings_t cpusFactor[NUMFAC_TIME_SLOTS], t_graph, t_etree_construct;
551 timings_t t_etree_merge, t_symb, t_num, t_solvetri;
552 options_t default_options[] = { SPACE_MSGLVL, SPACE_ETREE_NONZ };
553 int *invp, i, msglvl, maxzeros;
554
555 /* --------------------------------------------------
556 set default options, if no other options specified
557 -------------------------------------------------- */
558 if (options == NULL)
559 options = default_options;
560 msglvl = options[0];
561 maxzeros = options[1];
562
563 /* ----------------
564 reset all timers
565 ---------------- */
566 resettimer(t_graph);
567 resettimer(t_etree_construct);
568 resettimer(t_etree_merge);
569 resettimer(t_symb);
570 resettimer(t_num);
571 resettimer(t_solvetri);
572
573 /* -----------------
574 set up graph G(A)
575 ----------------- */
576 starttimer(t_graph);
577 G = setupGraphFromMtx(A);
578 stoptimer(t_graph);
579
580 if (msglvl > 0)
581 printf("\ninduced graph constructed: #vertices %d, #edges %d, #components "
582 "%d\n", G->nvtx, G->nedges >> 1, connectedComponents(G));
583
584 /* ---------------------------------------------------
585 construct inital elimination tree according to perm
586 --------------------------------------------------- */
587 starttimer(t_etree_construct);
588 mymalloc(invp, G->nvtx, int);
589 for (i = 0; i < G->nvtx; i++)
590 invp[perm[i]] = i;
591 T = setupElimTree(G, perm, invp);
592 stoptimer(t_etree_construct);
593 freeGraph(G);
594 free(invp);
595
596 if (msglvl > 0)
597 printf("quality of initial elim. tree: #fronts %d, #indices %d\n\t"
598 "nzl %d, ops %e, wspace %d\n", T->nfronts, nFactorIndices(T),
599 nFactorEntries(T), nFactorOps(T), nWorkspace(T));
600
601 /* -------------------------------
602 elimination tree transformation
603 ------------------------------- */
604 starttimer(t_etree_merge);
605 T2 = SPACE_transformElimTree(T, maxzeros);
606 stoptimer(t_etree_merge);
607 freeElimTree(T);
608
609 if (msglvl > 0)
610 printf("quality of transformed elim. tree: #fronts %d, #indices %d\n\t"
611 "nzl %d, ops %e, wspace %d\n", T2->nfronts, nFactorIndices(T2),
612 nFactorEntries(T2), nFactorOps(T2), nWorkspace(T2));
613
614 /* ------------------------
615 symbolical factorization
616 ------------------------ */
617 starttimer(t_symb);
618 L = SPACE_symbFac(T2, A);
619 stoptimer(t_symb);
620
621 if (msglvl > 0)
622 printf("quality of factor matrix:\n\tneqs %d, #indices %d, nzl %d\n",
623 L->css->neqs, L->css->nind, L->nelem);
624
625 /* -----------------------
626 numerical factorization
627 ----------------------- */
628 starttimer(t_num);
629 SPACE_numFac(L, cpusFactor);
630 stoptimer(t_num);
631
632 if (msglvl > 0)
633 printf("performance of numerical factorization: %6.2f mflops\n",
634 (double)nFactorOps(T2) / t_num / 1000000);
635
636 /* ------------------------------
637 solution of triangular systems
638 ------------------------------ */
639 starttimer(t_solvetri);
640 SPACE_solveTriangular(L, rhs, xvec);
641 stoptimer(t_solvetri);
642
643 if (msglvl > 0)
644 printf("performance of forward/backward solve: %6.2f mflops\n",
645 (double)nTriangularOps(T2) / t_solvetri / 1000000);
646
647 freeElimTree(T2);
648 freeFactorMtx(L);
649
650 /* --------------------------------------------------
651 pull back timing results, if vector cpus available
652 -------------------------------------------------- */
653 if (cpus != NULL)
654 { cpus[0] = t_graph;
655 cpus[1] = t_etree_construct;
656 cpus[2] = t_etree_merge;
657 cpus[3] = t_symb;
658 cpus[4] = t_num;
659 cpus[5] = cpusFactor[TIME_INITFRONT];
660 cpus[6] = cpusFactor[TIME_EXADD];
661 cpus[7] = cpusFactor[TIME_KERNEL];
662 cpus[8] = cpusFactor[TIME_INITUPD];
663 cpus[9] = t_solvetri;
664 }
665 }
666
667
668 /*****************************************************************************
669 o Input:
670 graph G with permutation vector perm
671 options -- if NULL, default options are used
672 option[0] holds OPTION_MSGLVL
673 option[1] holds OPTION_ETREE_NONZ
674 option[2] holds OPTION_ETREE_BAL
675 option[3] holds dimension of hypercube
676 o Output:
677 mapping object map
678 cpus -- if NULL, no timing information is pulled back
679 cpus[0] holds time to construct the elimination tree
680 cpus[1] holds time to transform the elimination tree
681 cpus[2] holds time to compute the mapping
682 o Comments:
683 this function can be used to obtain a mapping object for the
684 parallel factorization
685 ******************************************************************************/
686 mapping_t*
SPACE_mapping(graph_t * G,int * perm,options_t * options,timings_t * cpus)687 SPACE_mapping(graph_t *G, int *perm, options_t *options, timings_t *cpus)
688 { mapping_t *map;
689 elimtree_t *T, *T2;
690 timings_t t_etree_construct, t_etree_merge, t_map;
691 options_t default_options[] = { SPACE_MSGLVL, SPACE_ETREE_NONZ,
692 SPACE_ETREE_BAL, 2 };
693 int *invp, i, msglvl, maxzeros, bal, dimQ;
694
695 /* --------------------------------------------------
696 set default options, if no other options specified
697 -------------------------------------------------- */
698 if (options == NULL)
699 options = default_options;
700 msglvl = options[0];
701 maxzeros = options[1];
702 bal = options[2];
703 dimQ = options[3];
704
705 /* ----------------
706 reset all timers
707 ---------------- */
708 resettimer(t_etree_construct);
709 resettimer(t_etree_merge);
710 resettimer(t_map);
711
712 /* ---------------------------------------------------
713 construct inital elimination tree according to perm
714 --------------------------------------------------- */
715 starttimer(t_etree_construct);
716 mymalloc(invp, G->nvtx, int);
717 for (i = 0; i < G->nvtx; i++)
718 invp[perm[i]] = i;
719 T = setupElimTree(G, perm, invp);
720 stoptimer(t_etree_construct);
721 free(invp);
722
723 if (msglvl > 0)
724 printf("quality of initial elim. tree: #fronts %d, #indices %d\n\t"
725 "nzl %d, ops %e, wspace %d\n", T->nfronts, nFactorIndices(T),
726 nFactorEntries(T), nFactorOps(T), nWorkspace(T));
727
728 /* -------------------------------
729 elimination tree transformation
730 ------------------------------- */
731 starttimer(t_etree_merge);
732 T2 = SPACE_transformElimTree(T, maxzeros);
733 stoptimer(t_etree_merge);
734 freeElimTree(T);
735
736 if (msglvl > 0)
737 printf("quality of transformed elim. tree: #fronts %d, #indices %d\n\t"
738 "nzl %d, ops %e, wspace %d\n", T2->nfronts, nFactorIndices(T2),
739 nFactorEntries(T2), nFactorOps(T2), nWorkspace(T2));
740
741 /* -------------------
742 compute the mapping
743 ------------------- */
744 starttimer(t_map);
745 map = setupMapping(T2, dimQ, bal);
746 stoptimer(t_map);
747
748 /* --------------------------------------------------
749 pull back timing results, if vector cpus available
750 -------------------------------------------------- */
751 if (cpus != NULL)
752 { cpus[0] = t_etree_construct;
753 cpus[1] = t_etree_merge;
754 cpus[2] = t_map;
755 }
756
757 /* --------------------------------------------------------------
758 return mapping object (don't free T2, since it belongs to map)
759 -------------------------------------------------------------- */
760 return(map);
761 }
762 #endif
763