1 /*****************************************************************************
2 /
3 / SPACE (SPArse Cholesky Elimination) Library: interface.c
4 /
5 / author        J"urgen Schulze, University of Paderborn
6 / created       01jan26
7 /
8 / This file contains some high level interface functions (only these
9 / functions should be called by a user).
10 /
11 ******************************************************************************/
12 
13 
14 #include <space.h>
15 
16 /*****************************************************************************
17     o Input:
18         undirected graph G
19         options -- if NULL, default options are used
20            option[0] holds OPTION_ORDTYPE
21            option[1] holds OPTION_NODE_SELECTION1
22            option[2] holds OPTION_NODE_SELECTION2
23            option[3] holds OPTION_NODE_SELECTION3
24            option[4] holds OPTION_DOMAIN_SIZE
25            option[5] holds OPTION_MSGLVL
26     o Output:
27         elimination/front tree T reflecting the ordering of G
28         cpus -- if NULL, no timing information is pulled back
29            cpus[0]  holds TIME_COMPRESS
30            cpus[1]  holds TIME_MS
31            cpus[2]  holds TIME_MULTILEVEL
32            cpus[3]  holds TIME_INITDOMDEC
33            cpus[4]  holds TIME_COARSEDOMDEC
34            cpus[5]  holds TIME_INITSEP
35            cpus[6]  holds TIME_REFINESEP
36            cpus[7]  holds TIME_SMOOTH
37            cpus[8]  holds TIME_BOTTOMUP
38            cpus[9]  holds TIME_UPDADJNCY
39            cpus[10] holds TIME_FINDINODES
40            cpus[11] holds TIME_UPDSCORE
41     o Comments:
42         this function computes an ordering for G; it returns an elimination
43         tree T; permutation vectors perm, invp can be extracted from T by
44         calling function permFromElimTree(T, perm, invp)
45 ******************************************************************************/
46 elimtree_t*
SPACE_ordering(graph_t * G,options_t * options,timings_t * cpus)47 SPACE_ordering(graph_t *G, options_t *options, timings_t *cpus)
48 { graph_t       *Gc;
49   multisector_t *ms;
50   minprior_t    *minprior;
51   elimtree_t    *T, *T2;
52   timings_t	cpusOrd[ORD_TIME_SLOTS];
53   options_t     default_options[] = { SPACE_ORDTYPE, SPACE_NODE_SELECTION1,
54                        SPACE_NODE_SELECTION2, SPACE_NODE_SELECTION3,
55                        SPACE_DOMAIN_SIZE, SPACE_MSGLVL };
56   int           *vtxmap, istage, totnstep, totnzf;
57   FLOAT         totops;
58 
59   /* --------------------------------------------------
60      set default options, if no other options specified
61      -------------------------------------------------- */
62   if (options == NULL)
63     options = default_options;
64 
65   /* ----------------
66      reset all timers
67      ---------------- */
68   resettimer(cpusOrd[TIME_COMPRESS]);
69   resettimer(cpusOrd[TIME_MS]);
70   resettimer(cpusOrd[TIME_MULTILEVEL]);
71   resettimer(cpusOrd[TIME_INITDOMDEC]);
72   resettimer(cpusOrd[TIME_COARSEDOMDEC]);
73   resettimer(cpusOrd[TIME_INITSEP]);
74   resettimer(cpusOrd[TIME_REFINESEP]);
75   resettimer(cpusOrd[TIME_SMOOTH]);
76   resettimer(cpusOrd[TIME_BOTTOMUP]);
77   resettimer(cpusOrd[TIME_UPDADJNCY]);
78   resettimer(cpusOrd[TIME_FINDINODES]);
79   resettimer(cpusOrd[TIME_UPDSCORE]);
80 
81   /* ------------------
82      compress the graph
83      ------------------ */
84   starttimer(cpusOrd[TIME_COMPRESS]);
85   mymalloc(vtxmap, G->nvtx, int);
86   Gc = compressGraph(G, vtxmap);
87   stoptimer(cpusOrd[TIME_COMPRESS]);
88 
89   if (Gc != NULL)
90    { if (options[OPTION_MSGLVL] > 0)
91        printf("compressed graph constructed (#nodes %d, #edges %d)\n",
92               Gc->nvtx, Gc->nedges >> 1);
93    }
94   else
95    { Gc = G;
96      free(vtxmap);
97      if (options[OPTION_MSGLVL] > 0)
98        printf("no compressed graph constructed\n");
99    }
100 
101   /* -------------------
102      compute multisector
103      ------------------- */
104 
105 
106   starttimer(cpusOrd[TIME_MS]);
107   ms = constructMultisector(Gc, options, cpusOrd);
108   stoptimer(cpusOrd[TIME_MS]);
109 
110 
111   if (options[OPTION_MSGLVL] > 0)
112     printf("quality of multisector: #stages %d, #nodes %d, weight %d\n",
113            ms->nstages, ms->nnodes, ms->totmswght);
114 
115   /* ---------------------------------
116      compute minimum priority ordering
117      --------------------------------- */
118   starttimer(cpusOrd[TIME_BOTTOMUP])
119   minprior = setupMinPriority(ms);
120   T = orderMinPriority(minprior, options, cpusOrd);
121   stoptimer(cpusOrd[TIME_BOTTOMUP]);
122 
123   if (options[OPTION_MSGLVL] > 0)
124    { totnstep = totnzf = 0;
125      totops = 0.0;
126      for (istage = 0; istage < ms->nstages; istage++)
127       { totnstep += minprior->stageinfo[istage].nstep;
128         totnzf   += minprior->stageinfo[istage].nzf;
129         totops   += minprior->stageinfo[istage].ops;
130       }
131      printf("quality of ordering: #steps %d, nzl %d, ops %e\n", totnstep,
132             totnzf, totops);
133    }
134 
135   /* -----------------------
136      expand elimination tree
137      ----------------------- */
138   if (Gc != G)
139    { T2 = expandElimTree(T, vtxmap, G->nvtx);
140      freeElimTree(T);
141      freeGraph(Gc);
142      free(vtxmap);
143    }
144   else T2 = T;
145 
146   /* --------------------------------------------------
147      pull back timing results, if vector cpus available
148      -------------------------------------------------- */
149   if (cpus != NULL)
150    { cpus[0]  = cpusOrd[TIME_COMPRESS];
151      cpus[1]  = cpusOrd[TIME_MS];
152      cpus[2]  = cpusOrd[TIME_MULTILEVEL];
153      cpus[3]  = cpusOrd[TIME_INITDOMDEC];
154      cpus[4]  = cpusOrd[TIME_COARSEDOMDEC];
155      cpus[5]  = cpusOrd[TIME_INITSEP];
156      cpus[6]  = cpusOrd[TIME_REFINESEP];
157      cpus[7]  = cpusOrd[TIME_SMOOTH];
158      cpus[8]  = cpusOrd[TIME_BOTTOMUP];
159      cpus[9]  = cpusOrd[TIME_UPDADJNCY];
160      cpus[10] = cpusOrd[TIME_FINDINODES];
161      cpus[11] = cpusOrd[TIME_UPDSCORE];
162    }
163 
164   /* ----------------------
165      free memory and return
166      ---------------------- */
167   freeMultisector(ms);
168   freeMinPriority(minprior);
169   return(T2);
170 }
171 
172 
173 #if defined(cleaned_version)
174 /*****************************************************************************
175     o Input:
176         elimination/front tree T
177         max. number of zeros that is allowed to be introduced in front
178     o Output:
179         transformed elimination/front tree T'
180     o Comments:
181         the goal is to make T (obtained by orderMinPriority or
182         setupElimTree) more appropiate for the multifrontal algorithm
183 ******************************************************************************/
184 elimtree_t*
SPACE_transformElimTree(elimtree_t * T,int maxzeros)185 SPACE_transformElimTree(elimtree_t *T, int maxzeros)
186 { elimtree_t *T2, *T3;
187 
188   /* -----------------------------------------------------
189      1st: determine the fundamental fronts
190           this step significantly improves the cache reuse
191      ----------------------------------------------------- */
192   T2 = fundamentalFronts(T);
193 
194   /* -----------------------------------------------------------------
195      2nd: group together small subtrees into one front
196           this step reduces the number of fronts and thus the overhead
197           associated with them; the expense is added storage for the
198           logically zero entries and the factor operations on them
199      ------------------------------------------------------------------ */
200   T3 = mergeFronts(T2, maxzeros);
201   freeElimTree(T2);
202 
203   /* --------------------------------------------------------------
204      3rd: order the children of a front so that the working storage
205           in the multifrontal algorithm is minimized
206      -------------------------------------------------------------- */
207   (void)justifyFronts(T3);
208   return(T3);
209 }
210 
211 /*****************************************************************************
212     o Input:
213         transformed elimination/front tree T, input matrix A
214     o Output:
215         initial factor matrix L of the permuted input matrix PAP
216     o Comments: L contains nonzeros of PAP; all other entries are set to 0.0
217 ******************************************************************************/
218 factorMtx_t*
SPACE_symbFac(elimtree_t * T,inputMtx_t * A)219 SPACE_symbFac(elimtree_t *T, inputMtx_t *A)
220 { factorMtx_t *L;
221   frontsub_t  *frontsub;
222   css_t       *css;
223   inputMtx_t  *PAP;
224   elimtree_t  *PTP;
225   int         *perm, neqs, nelem;
226 
227   /* ------------------------------------------------------
228      extract permutation vectors from T and permute T and A
229      ------------------------------------------------------ */
230   neqs = A->neqs;
231   mymalloc(perm, neqs, int);
232   permFromElimTree(T, perm);
233   PTP = permuteElimTree(T, perm);
234   PAP = permuteInputMtx(A, perm);
235 
236   /* -------------------------------------------------------------------
237      create factor matrix L of PAP, i.e.
238       (1) create the subscript structure of the fronts, i.e. frontsub
239       (2) use frontsub to create the compressed subscript structure of L
240       (3) allocate memory for L and the nonzeros of L, i.e. L->nzl
241       (4) init. L with the nonzeros of PAP
242      ------------------------------------------------------------------- */
243   frontsub = setupFrontSubscripts(PTP, PAP);
244   css = setupCSSFromFrontSubscripts(frontsub);
245 
246   nelem = css->xnzl[neqs];
247   L = newFactorMtx(nelem);
248   L->perm = perm;
249   L->frontsub = frontsub;
250   L->css = css;
251 
252   initFactorMtx(L, PAP);
253 
254   /* -----------------------------------------------------
255      free permuted input matrix and return
256      note: PTP and perm have been inherited by frontsub, L
257      ----------------------------------------------------- */
258   freeInputMtx(PAP);
259   return(L);
260 }
261 
262 
263 /*****************************************************************************
264     o Input:
265         transformed elimination/front tree
266         initial factor matrix L of the permuted input matrix PAP
267     o Output:
268         factor matrix L of the permuted input matrix PAP
269         cpus -- if NULL no timing information is pulled back
270            cpus[0] holds TIME_INITFRONT
271            cpus[1] holds TIME_EXPAND
272            cpus[2] holds TIME_KERNEL
273            cpus[3] holds TIME_INITUPD
274     o Comments:
275         this function does the actual numerical factorization; to
276         improve register and cache reuse it uses a kernel of size 3x3
277 ******************************************************************************/
278 void
SPACE_numFac(factorMtx_t * L,timings_t * cpus)279 SPACE_numFac(factorMtx_t *L, timings_t *cpus)
280 { timings_t  cpusFactor[NUMFAC_TIME_SLOTS];
281 
282   /* ----------------
283      reset all timers
284      ---------------- */
285   resettimer(cpusFactor[TIME_INITFRONT]);
286   resettimer(cpusFactor[TIME_EXADD]);
287   resettimer(cpusFactor[TIME_KERNEL]);
288   resettimer(cpusFactor[TIME_INITUPD]);
289 
290   /* -------------------------
291      compute Cholesky factor L
292      ------------------------- */
293   numfac(L, cpusFactor);
294 
295  /* --------------------------------------------------
296      pull back timing results, if vector cpus available
297      -------------------------------------------------- */
298   if (cpus != NULL)
299    { cpus[0] = cpusFactor[TIME_INITFRONT];
300      cpus[1] = cpusFactor[TIME_EXADD];
301      cpus[2] = cpusFactor[TIME_KERNEL];
302      cpus[3] = cpusFactor[TIME_INITUPD];
303    }
304 }
305 
306 
307 /*****************************************************************************
308     o Input:
309         transformed elimination/front tree
310         factor matrix L of the permuted input matrix PAP
311         right hand side vector rhs of the original system Ax = b
312     o Output:
313         solution vector xvec of the original system Ax = b
314     o Comments:
315         this function solves the remaining triangular systems;
316 ******************************************************************************/
317 void
SPACE_solveTriangular(factorMtx_t * L,FLOAT * rhs,FLOAT * xvec)318 SPACE_solveTriangular(factorMtx_t *L, FLOAT *rhs, FLOAT *xvec)
319 { FLOAT *yvec;
320   int   *perm;
321   int   neqs, k;
322 
323   perm = L->perm;
324   neqs = L->css->neqs;
325 
326   /* -------------------------------------------
327      set up permuted right hand side vector yvec
328      ------------------------------------------- */
329   mymalloc(yvec, neqs, FLOAT);
330   for (k = 0; k < neqs; k++)
331     yvec[perm[k]] = rhs[k];
332 
333   /* -------------------------
334      solve Ly = b and L^Tz = y
335      ------------------------- */
336   forwardSubst1x1(L, yvec);
337   backwardSubst1x1(L, yvec);
338 
339   /* ---------------------------------------------------------------
340      extract from yvec the solution vector of the un-permuted system
341      --------------------------------------------------------------- */
342   for (k = 0; k < neqs; k++)
343     xvec[k] = yvec[perm[k]];
344   free(yvec);
345 }
346 
347 
348 /*****************************************************************************
349     o Input:
350         sparse matrix A, right hand side vector rhs
351         options -- if NULL, default options are used
352            option[0] holds OPTION_ORDTYPE
353            option[1] holds OPTION_NODE_SELECTION1
354            option[2] holds OPTION_NODE_SELECTION2
355            option[3] holds OPTION_NODE_SELECTION3
356            option[4] holds OPTION_DOMAIN_SIZE
357            option[5] holds OPTION_MSGLVL
358            option[6] holds OPTION_ETREE_NONZ
359     o Output:
360         solution vector xvec of the original system Ax = b
361         cpus -- if NULL, no timing information is pulled back
362            cpus[0]  holds time to construct the graph
363            cpus[1]  holds time to compute the ordering
364            cpus[2]  holds TIME_COMPRESS
365            cpus[3]  holds TIME_MS
366            cpus[4]  holds TIME_MULTILEVEL
367            cpus[5]  holds TIME_INITDOMDEC
368            cpus[6]  holds TIME_COARSEDOMDEC
369            cpus[7]  holds TIME_INITSEP
370            cpus[8]  holds TIME_REFINESEP
371            cpus[9]  holds TIME_SMOOTH
372            cpus[10] holds TIME_BOTTOMUP
373            cpus[11] holds TIME_UPDADJNCY;
374            cpus[12] holds TIME_FINDINODES
375            cpus[13] holds TIME_UPDSCORE
376            cpus[14] holds time to transform the elimination tree
377            cpus[15] holds time to compute the symbolical factorization
378            cpus[16] holds time to compute the numerical factorization
379            cpus[17] holds TIME_INITFRONT
380            cpus[18] holds TIME_EXADD
381            cpus[19] holds TIME_KERNEL
382            cpus[20] holds TIME_INITUPD
383            cpus[21] holds time to solve the triangular systems
384     o Comments:
385         this is the final topmost function that can be used as a black
386         box in other algorithm; it provides a general purpose direct
387         solver for large sparse positive definite systems
388 ******************************************************************************/
389 void
SPACE_solve(inputMtx_t * A,FLOAT * rhs,FLOAT * xvec,options_t * options,timings_t * cpus)390 SPACE_solve(inputMtx_t *A, FLOAT *rhs, FLOAT *xvec, options_t *options,
391             timings_t *cpus)
392 { graph_t     *G;
393   elimtree_t  *T, *T2;
394   factorMtx_t *L;
395   timings_t   cpusOrd[ORD_TIME_SLOTS], cpusFactor[NUMFAC_TIME_SLOTS];
396   timings_t   t_graph, t_ord, t_etree, t_symb, t_num, t_solvetri;
397   options_t   default_options[] = { SPACE_ORDTYPE, SPACE_NODE_SELECTION1,
398                      SPACE_NODE_SELECTION2, SPACE_NODE_SELECTION3,
399                      SPACE_DOMAIN_SIZE, SPACE_MSGLVL, SPACE_ETREE_NONZ };
400 
401   /* --------------------------------------------------
402      set default options, if no other options specified
403      -------------------------------------------------- */
404   if (options == NULL)
405     options = default_options;
406 
407   /* ----------------
408      reset all timers
409      ---------------- */
410   resettimer(t_graph);
411   resettimer(t_ord);
412   resettimer(t_etree);
413   resettimer(t_symb);
414   resettimer(t_num);
415   resettimer(t_solvetri);
416 
417   /* -----------------
418      set up graph G(A)
419      ----------------- */
420   starttimer(t_graph);
421   G = setupGraphFromMtx(A);
422   stoptimer(t_graph);
423 
424   if (options[OPTION_MSGLVL] > 0)
425     printf("\ninduced graph constructed: #vertices %d, #edges %d, #components "
426            "%d\n", G->nvtx, G->nedges >> 1, connectedComponents(G));
427 
428   /* --------------------------------------------
429      construct ordering/elimination tree for G(A)
430      -------------------------------------------- */
431   starttimer(t_ord);
432   T = SPACE_ordering(G, options, cpusOrd);
433   stoptimer(t_ord);
434   freeGraph(G);
435 
436   if (options[OPTION_MSGLVL] > 0)
437     printf("quality of initial elim. tree: #fronts %d, #indices %d\n\t"
438            "nzl %d, ops %e, wspace %d\n", T->nfronts, nFactorIndices(T),
439            nFactorEntries(T), nFactorOps(T), nWorkspace(T));
440 
441   /* -------------------------------
442      elimination tree transformation
443      ------------------------------- */
444   starttimer(t_etree);
445   T2 = SPACE_transformElimTree(T, options[OPTION_ETREE_NONZ]);
446   stoptimer(t_etree);
447   freeElimTree(T);
448 
449   if (options[OPTION_MSGLVL] > 0)
450     printf("quality of transformed elim. tree: #fronts %d, #indices %d\n\t"
451            "nzl %d, ops %e, wspace %d\n", T2->nfronts, nFactorIndices(T2),
452            nFactorEntries(T2), nFactorOps(T2), nWorkspace(T2));
453 
454   /* ------------------------
455      symbolical factorization
456      ------------------------ */
457   starttimer(t_symb);
458   L = SPACE_symbFac(T2, A);
459   stoptimer(t_symb);
460 
461   if (options[OPTION_MSGLVL] > 0)
462     printf("quality of factor matrix:\n\tneqs %d, #indices %d, nzl %d\n",
463            L->css->neqs, L->css->nind, L->nelem);
464 
465   /* -----------------------
466      numerical factorization
467      ----------------------- */
468   starttimer(t_num);
469   SPACE_numFac(L, cpusFactor);
470   stoptimer(t_num);
471 
472   if (options[OPTION_MSGLVL] > 0)
473     printf("performance of numerical factorization: %6.2f mflops\n",
474             (double)nFactorOps(T2) / t_num / 1000000);
475 
476   /* ------------------------------
477      solution of triangular systems
478      ------------------------------ */
479   starttimer(t_solvetri);
480   SPACE_solveTriangular(L, rhs, xvec);
481   stoptimer(t_solvetri);
482 
483   if (options[OPTION_MSGLVL] > 0)
484     printf("performance of forward/backward solve:  %6.2f mflops\n",
485             (double)nTriangularOps(T2) / t_solvetri / 1000000);
486 
487   freeElimTree(T2);
488   freeFactorMtx(L);
489 
490   /* --------------------------------------------------
491      pull back timing results, if vector cpus available
492      -------------------------------------------------- */
493   if (cpus != NULL)
494    { cpus[0]  = t_graph;
495      cpus[1]  = t_ord;
496      cpus[2]  = cpusOrd[TIME_COMPRESS];
497      cpus[3]  = cpusOrd[TIME_MS];
498      cpus[4]  = cpusOrd[TIME_MULTILEVEL];
499      cpus[5]  = cpusOrd[TIME_INITDOMDEC];
500      cpus[6]  = cpusOrd[TIME_COARSEDOMDEC];
501      cpus[7]  = cpusOrd[TIME_INITSEP];
502      cpus[8]  = cpusOrd[TIME_REFINESEP];
503      cpus[9]  = cpusOrd[TIME_SMOOTH];
504      cpus[10] = cpusOrd[TIME_BOTTOMUP];
505      cpus[11] = cpusOrd[TIME_UPDADJNCY];
506      cpus[12] = cpusOrd[TIME_FINDINODES];
507      cpus[13] = cpusOrd[TIME_UPDSCORE];
508      cpus[14] = t_etree;
509      cpus[15] = t_symb;
510      cpus[16] = t_num;
511      cpus[17] = cpusFactor[TIME_INITFRONT];
512      cpus[18] = cpusFactor[TIME_EXADD];
513      cpus[19] = cpusFactor[TIME_KERNEL];
514      cpus[20] = cpusFactor[TIME_INITUPD];
515      cpus[21] = t_solvetri;
516    }
517 }
518 
519 
520 /*****************************************************************************
521     o Input:
522         sparse matrix A with permutation vector perm
523         right hand side vector rhs
524         options -- if NULL, default options are used
525           option[0] holds OPTION_MSGLVL
526           option[1] holds OPTION_ETREE_NONZ
527     o Output:
528         solution vector xvec of the original system Ax = b
529         cpus -- if NULL, no timing information is pulled back
530            cpus[0] holds time to construct the graph
531            cpus[1] holds time to construct the elimination tree
532            cpus[2] holds time to transform the elimination tree
533            cpus[3] holds time to compute the symbolical factorization
534            cpus[4] holds time to compute the numerical factorization
535            cpus[5] holds TIME_INITFRONT
536            cpus[6] holds TIME_EXADD
537            cpus[7] holds TIME_KERNEL
538            cpus[8] holds TIME_INITUPD
539            cpus[9] holds time to solve the triangular systems
540     o Comments:
541         this function can be used to solve an equation system
542         using an externally computed permutation vector
543 ******************************************************************************/
544 void
SPACE_solveWithPerm(inputMtx_t * A,int * perm,FLOAT * rhs,FLOAT * xvec,options_t * options,timings_t * cpus)545 SPACE_solveWithPerm(inputMtx_t *A, int *perm, FLOAT *rhs, FLOAT *xvec,
546                     options_t *options, timings_t *cpus)
547 { graph_t     *G;
548   elimtree_t  *T, *T2;
549   factorMtx_t *L;
550   timings_t   cpusFactor[NUMFAC_TIME_SLOTS], t_graph, t_etree_construct;
551   timings_t   t_etree_merge, t_symb, t_num, t_solvetri;
552   options_t   default_options[] = { SPACE_MSGLVL, SPACE_ETREE_NONZ };
553   int         *invp, i, msglvl, maxzeros;
554 
555   /* --------------------------------------------------
556      set default options, if no other options specified
557      -------------------------------------------------- */
558   if (options == NULL)
559     options = default_options;
560   msglvl = options[0];
561   maxzeros = options[1];
562 
563   /* ----------------
564      reset all timers
565      ---------------- */
566   resettimer(t_graph);
567   resettimer(t_etree_construct);
568   resettimer(t_etree_merge);
569   resettimer(t_symb);
570   resettimer(t_num);
571   resettimer(t_solvetri);
572 
573   /* -----------------
574      set up graph G(A)
575      ----------------- */
576   starttimer(t_graph);
577   G = setupGraphFromMtx(A);
578   stoptimer(t_graph);
579 
580   if (msglvl > 0)
581     printf("\ninduced graph constructed: #vertices %d, #edges %d, #components "
582            "%d\n", G->nvtx, G->nedges >> 1, connectedComponents(G));
583 
584   /* ---------------------------------------------------
585      construct inital elimination tree according to perm
586      --------------------------------------------------- */
587   starttimer(t_etree_construct);
588   mymalloc(invp, G->nvtx, int);
589   for (i = 0; i < G->nvtx; i++)
590     invp[perm[i]] = i;
591   T = setupElimTree(G, perm, invp);
592   stoptimer(t_etree_construct);
593   freeGraph(G);
594   free(invp);
595 
596   if (msglvl > 0)
597     printf("quality of initial elim. tree: #fronts %d, #indices %d\n\t"
598            "nzl %d, ops %e, wspace %d\n", T->nfronts, nFactorIndices(T),
599            nFactorEntries(T), nFactorOps(T), nWorkspace(T));
600 
601   /* -------------------------------
602      elimination tree transformation
603      ------------------------------- */
604   starttimer(t_etree_merge);
605   T2 = SPACE_transformElimTree(T, maxzeros);
606   stoptimer(t_etree_merge);
607   freeElimTree(T);
608 
609   if (msglvl > 0)
610     printf("quality of transformed elim. tree: #fronts %d, #indices %d\n\t"
611            "nzl %d, ops %e, wspace %d\n", T2->nfronts, nFactorIndices(T2),
612            nFactorEntries(T2), nFactorOps(T2), nWorkspace(T2));
613 
614   /* ------------------------
615      symbolical factorization
616      ------------------------ */
617   starttimer(t_symb);
618   L = SPACE_symbFac(T2, A);
619   stoptimer(t_symb);
620 
621   if (msglvl > 0)
622     printf("quality of factor matrix:\n\tneqs %d, #indices %d, nzl %d\n",
623            L->css->neqs, L->css->nind, L->nelem);
624 
625   /* -----------------------
626      numerical factorization
627      ----------------------- */
628   starttimer(t_num);
629   SPACE_numFac(L, cpusFactor);
630   stoptimer(t_num);
631 
632   if (msglvl > 0)
633     printf("performance of numerical factorization: %6.2f mflops\n",
634             (double)nFactorOps(T2) / t_num / 1000000);
635 
636   /* ------------------------------
637      solution of triangular systems
638      ------------------------------ */
639   starttimer(t_solvetri);
640   SPACE_solveTriangular(L, rhs, xvec);
641   stoptimer(t_solvetri);
642 
643   if (msglvl > 0)
644     printf("performance of forward/backward solve:  %6.2f mflops\n",
645             (double)nTriangularOps(T2) / t_solvetri / 1000000);
646 
647   freeElimTree(T2);
648   freeFactorMtx(L);
649 
650   /* --------------------------------------------------
651      pull back timing results, if vector cpus available
652      -------------------------------------------------- */
653   if (cpus != NULL)
654    { cpus[0] = t_graph;
655      cpus[1] = t_etree_construct;
656      cpus[2] = t_etree_merge;
657      cpus[3] = t_symb;
658      cpus[4] = t_num;
659      cpus[5] = cpusFactor[TIME_INITFRONT];
660      cpus[6] = cpusFactor[TIME_EXADD];
661      cpus[7] = cpusFactor[TIME_KERNEL];
662      cpus[8] = cpusFactor[TIME_INITUPD];
663      cpus[9] = t_solvetri;
664    }
665 }
666 
667 
668 /*****************************************************************************
669     o Input:
670         graph G with permutation vector perm
671         options -- if NULL, default options are used
672           option[0] holds OPTION_MSGLVL
673           option[1] holds OPTION_ETREE_NONZ
674           option[2] holds OPTION_ETREE_BAL
675           option[3] holds dimension of hypercube
676     o Output:
677         mapping object map
678         cpus -- if NULL, no timing information is pulled back
679            cpus[0] holds time to construct the elimination tree
680            cpus[1] holds time to transform the elimination tree
681            cpus[2] holds time to compute the mapping
682     o Comments:
683         this function can be used to obtain a mapping object for the
684         parallel factorization
685 ******************************************************************************/
686 mapping_t*
SPACE_mapping(graph_t * G,int * perm,options_t * options,timings_t * cpus)687 SPACE_mapping(graph_t *G, int *perm, options_t *options, timings_t *cpus)
688 { mapping_t *map;
689   elimtree_t *T, *T2;
690   timings_t  t_etree_construct, t_etree_merge, t_map;
691   options_t  default_options[] = { SPACE_MSGLVL, SPACE_ETREE_NONZ,
692                                    SPACE_ETREE_BAL, 2 };
693   int        *invp, i, msglvl, maxzeros, bal, dimQ;
694 
695   /* --------------------------------------------------
696      set default options, if no other options specified
697      -------------------------------------------------- */
698   if (options == NULL)
699     options = default_options;
700   msglvl = options[0];
701   maxzeros = options[1];
702   bal = options[2];
703   dimQ = options[3];
704 
705   /* ----------------
706      reset all timers
707      ---------------- */
708   resettimer(t_etree_construct);
709   resettimer(t_etree_merge);
710   resettimer(t_map);
711 
712   /* ---------------------------------------------------
713      construct inital elimination tree according to perm
714      --------------------------------------------------- */
715   starttimer(t_etree_construct);
716   mymalloc(invp, G->nvtx, int);
717   for (i = 0; i < G->nvtx; i++)
718     invp[perm[i]] = i;
719   T = setupElimTree(G, perm, invp);
720   stoptimer(t_etree_construct);
721   free(invp);
722 
723   if (msglvl > 0)
724     printf("quality of initial elim. tree: #fronts %d, #indices %d\n\t"
725            "nzl %d, ops %e, wspace %d\n", T->nfronts, nFactorIndices(T),
726            nFactorEntries(T), nFactorOps(T), nWorkspace(T));
727 
728   /* -------------------------------
729      elimination tree transformation
730      ------------------------------- */
731   starttimer(t_etree_merge);
732   T2 = SPACE_transformElimTree(T, maxzeros);
733   stoptimer(t_etree_merge);
734   freeElimTree(T);
735 
736   if (msglvl > 0)
737     printf("quality of transformed elim. tree: #fronts %d, #indices %d\n\t"
738            "nzl %d, ops %e, wspace %d\n", T2->nfronts, nFactorIndices(T2),
739            nFactorEntries(T2), nFactorOps(T2), nWorkspace(T2));
740 
741   /* -------------------
742      compute the mapping
743      ------------------- */
744   starttimer(t_map);
745   map = setupMapping(T2, dimQ, bal);
746   stoptimer(t_map);
747 
748   /* --------------------------------------------------
749      pull back timing results, if vector cpus available
750      -------------------------------------------------- */
751   if (cpus != NULL)
752    { cpus[0] = t_etree_construct;
753      cpus[1] = t_etree_merge;
754      cpus[2] = t_map;
755    }
756 
757   /* --------------------------------------------------------------
758      return mapping object (don't free T2, since it belongs to map)
759      -------------------------------------------------------------- */
760   return(map);
761 }
762 #endif
763