1 /* factorUtil.c */
2
3 #include "../FrontMtx.h"
4 #include "../../timings.h"
5
6 /*--------------------------------------------------------------------*/
7 /*
8 -----------------------------
9 purpose -- initialize a front
10
11 created -- 98may04, cca
12 -----------------------------
13 */
14 void
FrontMtx_initializeFront(FrontMtx * frontmtx,Chv * frontJ,int J)15 FrontMtx_initializeFront (
16 FrontMtx *frontmtx,
17 Chv *frontJ,
18 int J
19 ) {
20 int ncolJ, nD, nrowJ ;
21 int *colindJ, *ivec ;
22 /*
23 -----------------------------------
24 get the number of internal vertices
25 -----------------------------------
26 */
27 nD = ETree_frontSize(frontmtx->frontETree, J) ;
28 /*
29 --------------------------------------
30 get the internal and boundary vertices
31 --------------------------------------
32 */
33 IVL_listAndSize(frontmtx->symbfacIVL, J, &ncolJ, &colindJ) ;
34 #if MYDEBUG > 0
35 fprintf(stdout, "\n front %d, column indices", J) ;
36 IVfprintf(stdout, ncolJ, colindJ) ;
37 fflush(stdout) ;
38 #endif
39 /*
40 ------------------------------------------------------
41 initialize the front's dimensions, indices and entries
42 ------------------------------------------------------
43 */
44 #if MYDEBUG > 0
45 fprintf(stdout, "\n front %d, nD %d, nU %d", J, nD, ncolJ - nD) ;
46 fflush(stdout) ;
47 #endif
48 Chv_init(frontJ, J, nD, ncolJ - nD, ncolJ - nD,
49 frontmtx->type, frontmtx->symmetryflag) ;
50 /*
51 -----------------------
52 fill the column indices
53 -----------------------
54 */
55 Chv_columnIndices(frontJ, &ncolJ, &ivec) ;
56 IVcopy(ncolJ, ivec, colindJ) ;
57 #if MYDEBUG > 0
58 fprintf(stdout, "\n front %d, colind = %p", J, ivec) ;
59 IVfprintf(stdout, ncolJ, ivec) ;
60 fflush(stdout) ;
61 #endif
62 if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
63 /*
64 --------------------
65 fill the row indices
66 --------------------
67 */
68 Chv_rowIndices(frontJ, &nrowJ, &ivec) ;
69 IVcopy(nrowJ, ivec, colindJ) ;
70 }
71 /*
72 ------------------------
73 zero the front's entries
74 ------------------------
75 */
76 Chv_zero(frontJ) ;
77 #if MYDEBUG > 0
78 fprintf(stdout, "\n leaving Front_initializeFront()") ;
79 Chv_writeForHumanEye(frontJ, stdout) ;
80 fflush(stdout) ;
81 #endif
82
83 return ; }
84
85 /*--------------------------------------------------------------------*/
86 /*
87 ----------------
88 static functions
89 ----------------
90 */
91 static void assembleAggregates ( Chv *frontJ, ChvList *aggList,
92 ChvManager *chvmanager, double cpus[], int msglvl, FILE *msgFile ) ;
93 static char assemblePostponedData ( FrontMtx *frontmtx, Chv *frontJ,
94 int *pndelay, Chv *fronts[], ChvList *postList,
95 ChvManager *chvmanager, double cpus[], int msglvl, FILE *msgFile ) ;
96 static int factorFront ( FrontMtx *frontmtx, Chv *frontJ,
97 int ndelay, double tau, IV *pivotsizesIV, DV *workDV, int stats[],
98 double cpus[], int msglvl, FILE *msgFile ) ;
99 static void storeEntries ( FrontMtx *frontmtx, Chv *frontJ, int nelim,
100 double droptol, IV *pivotsizesIV, ChvList *postList,
101 ChvManager *chvmanager, int parent[], int stats[], double cpus[],
102 int msglvl, FILE *msgFile ) ;
103 /*
104 ------------------------------------------------------------------
105 purpose -- to visit a front during a factorization.
106 note: this method is called by the serial,
107 multithreaded and MPI factorization codes.
108
109 frontmtx -- front matrix object
110 pencil -- matrix pencil for A + sigma*B
111 J -- id of front we are working on
112 myid -- id of thread or process
113 owners[] -- map from fronts to owning threads,
114 in a serial environment, owners = NULL
115 fronts[] -- vector of pointers to fronts
116 lookahead -- parameter controls the partial upward visiting
117 of ancestors before returning
118 tau -- used when pivoting enabled,
119 |L_{j,i}| and |U_{i,j}| <= tau
120 droptol -- used for an approximate factorization
121 stored |L_{j,i}| and |U_{i,j}| > droptol
122 status[] -- status vector for the fronts,
123 'W' -- needs to be woke up
124 'A' -- active front
125 'F' -- front is finished
126 heads[] -- vector of pointers to IP objects that store the
127 front-to-front update lists
128 pivotsizesIV -- IV object used during the factorization of a front
129 when pivoting is enabled
130 workDV -- DV object used for working storage
131 parent -- front parent vector
132 aggList -- list object used in parallel environment, used
133 to store aggregate fronts
134 postList -- list object used in pivoting and/or parallel
135 environments, used to stored delayed data and/or
136 to synchronize the factorization
137 chvmanager -- used to manage working storage for Chv objects
138 stats[] -- statistics vector
139 cpus[] -- vector to hold breakdown of cpu times
140 msglvl -- message level
141 msgFil -- message file
142
143 created -- 98may04, cca
144 ------------------------------------------------------------------
145 */
146 char
FrontMtx_factorVisit(FrontMtx * frontmtx,Pencil * pencil,int J,int myid,int owners[],Chv * fronts[],int lookahead,double tau,double droptol,char status[],IP * heads[],IV * pivotsizesIV,DV * workDV,int parent[],ChvList * aggList,ChvList * postList,ChvManager * chvmanager,int stats[],double cpus[],int msglvl,FILE * msgFile)147 FrontMtx_factorVisit (
148 FrontMtx *frontmtx,
149 Pencil *pencil,
150 int J,
151 int myid,
152 int owners[],
153 Chv *fronts[],
154 int lookahead,
155 double tau,
156 double droptol,
157 char status[],
158 IP *heads[],
159 IV *pivotsizesIV,
160 DV *workDV,
161 int parent[],
162 ChvList *aggList,
163 ChvList *postList,
164 ChvManager *chvmanager,
165 int stats[],
166 double cpus[],
167 int msglvl,
168 FILE *msgFile
169 ) {
170 /*
171 ---------------
172 local variables
173 ---------------
174 */
175 char allAggregatesHere, allPostponedAssmb, allUpdatesDone ;
176 Chv *frontJ ;
177 double t1, t2 ;
178 int K, ndelay, nelim ;
179
180 if ( status[J] == 'F' ) {
181 return('F') ;
182 }
183 allUpdatesDone = 'N' ;
184 allAggregatesHere = 'N' ;
185 allPostponedAssmb = 'N' ;
186 frontJ = NULL ;
187 if ( heads[J] != NULL ) {
188 /*
189 --------------------------------
190 internal updates to be performed
191 --------------------------------
192 */
193 if ( (frontJ = fronts[J]) == NULL ) {
194 /*
195 --------------------
196 initialize the front
197 --------------------
198 */
199 fronts[J] = FrontMtx_setupFront(frontmtx, pencil, J, myid,
200 owners, chvmanager, cpus, msglvl, msgFile) ;
201 frontJ = fronts[J] ;
202 status[J] = 'A' ;
203 }
204 /*
205 ---------------------------------
206 compute updates from owned fronts
207 ---------------------------------
208 */
209 if ( msglvl > 1 ) {
210 fprintf(msgFile, "\n performing updates") ;
211 fflush(msgFile) ;
212 }
213 MARKTIME(t1) ;
214 FrontMtx_update(frontmtx, frontJ, heads,
215 status, workDV, msglvl, msgFile) ;
216 MARKTIME(t2) ;
217 cpus[2] += t2 - t1 ;
218 }
219 if ( heads[J] == NULL ) {
220 allUpdatesDone = 'Y' ;
221 }
222 if ( owners == NULL || owners[J] == myid ) {
223 /*
224 --------------
225 front is owned
226 --------------
227 */
228 if ( (frontJ = fronts[J]) == NULL ) {
229 /*
230 --------------------
231 initialize the front
232 --------------------
233 */
234 fronts[J] = FrontMtx_setupFront(frontmtx, pencil, J, myid,
235 owners, chvmanager, cpus, msglvl, msgFile) ;
236 frontJ = fronts[J] ;
237 status[J] = 'A' ;
238 }
239 if ( aggList != NULL ) {
240 /*
241 ------------------------------------------
242 we are operating in a parallel environment
243 ------------------------------------------
244 */
245 if ( ChvList_isListNonempty(aggList, J) == 1 ) {
246 /*
247 -------------------------------------------------
248 assemble any aggregate updates from other threads
249 -------------------------------------------------
250 */
251 assembleAggregates(frontJ, aggList, chvmanager,
252 cpus, msglvl, msgFile) ;
253 }
254 if ( ChvList_isCountZero(aggList, J) == 1 ) {
255 /*
256 -------------------------------------------------------
257 all aggregates are assembled or in the list to assemble
258 -------------------------------------------------------
259 */
260 if ( ChvList_isListNonempty(aggList, J) == 1 ) {
261 /*
262 -------------------------------------------------
263 assemble any aggregate updates from other threads
264 -------------------------------------------------
265 */
266 assembleAggregates(frontJ, aggList, chvmanager,
267 cpus, msglvl, msgFile) ;
268 }
269 allAggregatesHere = 'Y' ;
270 }
271 } else {
272 allAggregatesHere = 'Y' ;
273 }
274 if ( msglvl > 1 ) {
275 fprintf(msgFile, "\n\n allUpdatesDone %c, allAggregatesHere %c",
276 allUpdatesDone, allAggregatesHere) ;
277 fflush(msgFile) ;
278 }
279 if ( allUpdatesDone == 'Y' && allAggregatesHere == 'Y' ) {
280 /*
281 -------------------------------------
282 all internal updates and all external
283 aggregates have been incorporated
284 -------------------------------------
285 */
286 if ( postList != NULL ) {
287 /*
288 ---------------------------------------------
289 front is ready to assemble any postponed data
290 ---------------------------------------------
291 */
292 allPostponedAssmb = assemblePostponedData(frontmtx, frontJ,
293 &ndelay, fronts, postList,
294 chvmanager, cpus, msglvl, msgFile) ;
295 frontJ = fronts[J] ;
296 } else {
297 allPostponedAssmb = 'Y' ;
298 ndelay = 0 ;
299 }
300 if ( msglvl > 1 ) {
301 fprintf(msgFile,
302 "\n\n allPostponedAssmb %c", allPostponedAssmb) ;
303 fflush(msgFile) ;
304 }
305 if ( allPostponedAssmb == 'Y' ) {
306 /*
307 -----------------------------
308 front is ready to be factored
309 -----------------------------
310 */
311 nelim = factorFront(frontmtx, frontJ, ndelay, tau,
312 pivotsizesIV, workDV, stats,
313 cpus, msglvl, msgFile) ;
314 if ( msglvl > 1 ) {
315 fprintf(msgFile,
316 "\n\n J = %d, nelim = %d", frontJ->id, nelim) ;
317 fflush(msgFile) ;
318 }
319 if ( (! FRONTMTX_IS_PIVOTING(frontmtx)) && nelim < frontJ->nD){
320 /*
321 ------------------------------------------------
322 a singular front matrix has been found.
323 release the front and set the status to error
324 ------------------------------------------------
325 */
326 ChvManager_releaseObject(chvmanager, frontJ) ;
327 fronts[J] = NULL ;
328 status[J] = 'E' ;
329 } else {
330 /*
331 -------------------------------------------
332 store any factor entries and postponed data
333 -------------------------------------------
334 */
335 storeEntries(frontmtx, frontJ, nelim, droptol, pivotsizesIV,
336 postList, chvmanager, parent, stats,
337 cpus, msglvl, msgFile) ;
338 /*
339 ------------------------------------------------
340 release the front and set the status to finished
341 ------------------------------------------------
342 */
343 ChvManager_releaseObject(chvmanager, frontJ) ;
344 fronts[J] = NULL ;
345 status[J] = 'F' ;
346 }
347 }
348 }
349 } else if ( allUpdatesDone == 'Y' ) {
350 if ( frontJ != NULL ) {
351 /*
352 --------------------------------------------------------
353 front is not owned and all internal updates are complete
354 put aggregate update front on the aggregate list
355 --------------------------------------------------------
356 */
357 if ( msglvl > 1 ) {
358 fprintf(msgFile, "\n done with unowned front %3d", J) ;
359 fflush(msgFile) ;
360 }
361 if ( msglvl > 3 ) {
362 Chv_writeForHumanEye(frontJ, msgFile) ;
363 fflush(msgFile) ;
364 }
365 MARKTIME(t1) ;
366 ChvList_addObjectToList(aggList, frontJ, J) ;
367 MARKTIME(t2) ;
368 #if MYDEBUG > 0
369 fprintf(stdout, "\n thread %2d, done with unowned front %3d",
370 myid, J) ;
371 fflush(stdout) ;
372 #endif
373 cpus[7] += t2 - t1 ;
374 }
375 status[J] = 'F' ;
376 }
377 if ( --lookahead >= 0 && (K = parent[J]) != -1 ) {
378 /*
379 -----------------------------
380 visit parent before returning
381 -----------------------------
382 */
383 FrontMtx_factorVisit(frontmtx, pencil, J, myid, owners, fronts,
384 lookahead, tau, droptol, status, heads, pivotsizesIV,
385 workDV, parent, aggList, postList, chvmanager,
386 stats, cpus, msglvl, msgFile) ;
387 }
388 return(status[J]) ; }
389
390 /*--------------------------------------------------------------------*/
391 /*
392 --------------------------------------------
393 purpose -- set up the front's data structure
394
395 created -- 98mar27, cca
396 --------------------------------------------
397 */
398 Chv *
FrontMtx_setupFront(FrontMtx * frontmtx,Pencil * pencil,int J,int myid,int owners[],ChvManager * chvmanager,double cpus[],int msglvl,FILE * msgFile)399 FrontMtx_setupFront (
400 FrontMtx *frontmtx,
401 Pencil *pencil,
402 int J,
403 int myid,
404 int owners[],
405 ChvManager *chvmanager,
406 double cpus[],
407 int msglvl,
408 FILE *msgFile
409 ) {
410 Chv *frontJ ;
411 double t1, t2 ;
412 int nbytes, nD, nL, nU ;
413
414 if ( msglvl > 4 ) {
415 fprintf(msgFile,
416 "\n\n inside FrontMtx_setupFront()"
417 "\n frontmtx %p, pencil %p, J %d, myid %d"
418 "\n owners %p, chvmanager %p, cpus %p"
419 "\n msglvl %d, msgFile %p",
420 frontmtx, pencil, J, myid, owners, chvmanager,
421 cpus, msglvl, msgFile) ;
422 fflush(msgFile) ;
423 }
424 MARKTIME(t1) ;
425 FrontMtx_initialFrontDimensions(frontmtx, J,
426 &nD, &nL, &nU, &nbytes) ;
427 if ( msglvl > 2 ) {
428 fprintf(msgFile, "\n nD %d, nL %d, nU %d, nbytes %d",
429 nD, nL, nU, nbytes) ;
430 fflush(msgFile) ;
431 }
432 frontJ = ChvManager_newObjectOfSizeNbytes(chvmanager, nbytes) ;
433 if ( msglvl > 2 ) {
434 fprintf(msgFile, "\n frontJ = %p", frontJ) ;
435 fflush(msgFile) ;
436 }
437 Chv_init(frontJ, J, nD, nL, nU, frontmtx->type, frontmtx->symmetryflag);
438 FrontMtx_initializeFront(frontmtx, frontJ, J) ;
439 MARKTIME(t2) ;
440 cpus[0] += t2 - t1 ;
441 if ( pencil != NULL && (owners == NULL || owners[J] == myid) ) {
442 /*
443 -------------------------------------------------
444 thread owns this front, load the original entries
445 -------------------------------------------------
446 */
447 MARKTIME(t1) ;
448 FrontMtx_loadEntries(frontJ, pencil, msglvl, msgFile) ;
449 MARKTIME(t2) ;
450 cpus[1] += t2 - t1 ;
451 if ( msglvl > 1 ) {
452 fprintf(msgFile, "\n original entries loaded") ;
453 fflush(msgFile) ;
454 }
455 /*
456 if ( FRONTMTX_IS_HERMITIAN(frontmtx) && J == frontmtx->nfront - 1 ) {
457 fprintf(msgFile, "\n last front after entries loaded") ;
458 Chv_writeForHumanEye(frontJ, msgFile) ;
459 fflush(msgFile) ;
460 }
461 */
462 }
463 if ( msglvl > 1 ) {
464 fprintf(msgFile, "\n\n front initialized") ;
465 fflush(msgFile) ;
466 }
467 if ( msglvl > 3 ) {
468 Chv_writeForHumanEye(frontJ, msgFile) ;
469 fflush(msgFile) ;
470 }
471 return(frontJ) ; }
472
473 /*--------------------------------------------------------------------*/
474 /*
475 ------------------------------------------------------------
476 purpose -- assemble any aggregate updates from other threads
477 ------------------------------------------------------------
478 */
479 static void
assembleAggregates(Chv * frontJ,ChvList * aggList,ChvManager * chvmanager,double cpus[],int msglvl,FILE * msgFile)480 assembleAggregates (
481 Chv *frontJ,
482 ChvList *aggList,
483 ChvManager *chvmanager,
484 double cpus[],
485 int msglvl,
486 FILE *msgFile
487 ) {
488 Chv *chv, *headchv ;
489 double t1, t2 ;
490
491 MARKTIME(t1) ;
492 headchv = ChvList_getList(aggList, frontJ->id) ;
493 for ( chv = headchv ; chv != NULL ; chv = chv->next ) {
494 Chv_assembleChv(frontJ, chv) ;
495 }
496 MARKTIME(t2) ;
497 cpus[8] += t2 - t1 ;
498 ChvManager_releaseListOfObjects(chvmanager, headchv) ;
499 if ( msglvl > 3 ) {
500 fprintf(msgFile, "\n after assembly") ;
501 Chv_writeForHumanEye(frontJ, msgFile) ;
502 fflush(msgFile) ;
503 }
504 return ; }
505
506 /*--------------------------------------------------------------------*/
507 /*
508 --------------------------------------
509 purpose -- assemble any postponed data
510
511 created -- 98mar27, cca
512 --------------------------------------
513 */
514 static char
assemblePostponedData(FrontMtx * frontmtx,Chv * frontJ,int * pndelay,Chv * fronts[],ChvList * postList,ChvManager * chvmanager,double cpus[],int msglvl,FILE * msgFile)515 assemblePostponedData (
516 FrontMtx *frontmtx,
517 Chv *frontJ,
518 int *pndelay,
519 Chv *fronts[],
520 ChvList *postList,
521 ChvManager *chvmanager,
522 double cpus[],
523 int msglvl,
524 FILE *msgFile
525 ) {
526 char rc ;
527 double t1, t2 ;
528 int J ;
529
530 if ( msglvl > 4 ) {
531 fprintf(msgFile, "\n\n frontmtx %p, frontJ %p, pndelay %p"
532 "\n fronts %p, postList %p, chvmanager %p, cpus %p",
533 frontmtx, frontJ, pndelay,
534 fronts, postList, chvmanager, cpus) ;
535 fflush(msgFile) ;
536 }
537
538
539 J = frontJ->id ;
540 if ( msglvl > 1 ) {
541 if ( ChvList_isCountZero(postList, J) == 1) {
542 fprintf(msgFile, "\n all postponed data is here") ;
543 fflush(msgFile) ;
544 } else {
545 fprintf(msgFile, "\n still waiting on postponed data") ;
546 fflush(msgFile) ;
547 }
548 }
549 if ( ChvList_isCountZero(postList, J) == 1 ) {
550 if ( msglvl > 1 ) {
551 fprintf(msgFile, "\n assembling postponed data") ;
552 fflush(msgFile) ;
553 }
554 /*
555 ---------------------------
556 assemble any postponed data
557 ---------------------------
558 */
559 MARKTIME(t1) ;
560 fronts[J] = FrontMtx_assemblePostponedData(frontmtx, frontJ,
561 postList, chvmanager, pndelay) ;
562 if ( frontJ != fronts[J] ) {
563 if ( msglvl > 1 ) {
564 fprintf(msgFile, "\n releasing old front") ;
565 fflush(msgFile) ;
566 }
567 ChvManager_releaseObject(chvmanager, frontJ) ;
568 }
569 MARKTIME(t2) ;
570 cpus[3] += t2 - t1 ;
571 rc = 'Y' ;
572 } else {
573 rc = 'N' ;
574 }
575 return(rc) ; }
576
577 /*--------------------------------------------------------------------*/
578 /*
579 ---------------------------
580 purpose -- factor the front
581
582 created -- 98mar27, cca
583 ---------------------------
584 */
585 static int
factorFront(FrontMtx * frontmtx,Chv * frontJ,int ndelay,double tau,IV * pivotsizesIV,DV * workDV,int stats[],double cpus[],int msglvl,FILE * msgFile)586 factorFront (
587 FrontMtx *frontmtx,
588 Chv *frontJ,
589 int ndelay,
590 double tau,
591 IV *pivotsizesIV,
592 DV *workDV,
593 int stats[],
594 double cpus[],
595 int msglvl,
596 FILE *msgFile
597 ) {
598 double t1, t2 ;
599 int nelim, npost ;
600
601 if ( msglvl > 2 ) {
602 fprintf(msgFile, "\n frontJ = %p, ndelay = %d",
603 frontJ, ndelay) ;
604 fprintf(msgFile, "\n tau = %12.4e", tau) ;
605 fprintf(msgFile, "\n stats %p, cpus %p", stats, cpus) ;
606 fflush(msgFile) ;
607 }
608 if ( msglvl > 2 ) {
609 Chv_writeForHumanEye(frontJ, msgFile) ;
610 fflush(msgFile) ;
611 }
612 MARKTIME(t1) ;
613 if ( FRONTMTX_IS_PIVOTING(frontmtx) ) {
614 nelim = Chv_factorWithPivoting(frontJ, ndelay,
615 frontmtx->pivotingflag, pivotsizesIV,
616 workDV, tau, &stats[1]) ;
617 } else {
618 nelim = Chv_factorWithNoPivoting(frontJ, frontmtx->patchinfo) ;
619 }
620 npost = frontJ->nD - nelim ;
621 stats[2] += npost ;
622 if ( !(FRONTMTX_IS_PIVOTING(frontmtx))
623 || FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
624 stats[0] += nelim ;
625 } else {
626 stats[0] += IV_size(pivotsizesIV) ;
627 }
628 MARKTIME(t2) ;
629 cpus[4] += t2 - t1 ;
630 if ( msglvl > 1 ) {
631 fprintf(msgFile, "\n\n front %d, nelim = %d, npost = %d",
632 frontJ->id, nelim, npost) ;
633 fflush(msgFile) ;
634 }
635 if ( msglvl > 2 ) {
636 Chv_writeForHumanEye(frontJ, msgFile) ;
637 fflush(msgFile) ;
638 }
639 return(nelim) ; }
640
641 /*--------------------------------------------------------------------*/
642 /*
643 ---------------------------------------------------------------
644 purpose -- store the factor entries in the front matrix object,
645 and extract any postponed data and put it in the
646 list object
647
648 created -- 98mar27, cca
649 ---------------------------------------------------------------
650 */
651 static void
storeEntries(FrontMtx * frontmtx,Chv * frontJ,int nelim,double droptol,IV * pivotsizesIV,ChvList * postList,ChvManager * chvmanager,int parent[],int stats[],double cpus[],int msglvl,FILE * msgFile)652 storeEntries (
653 FrontMtx *frontmtx,
654 Chv *frontJ,
655 int nelim,
656 double droptol,
657 IV *pivotsizesIV,
658 ChvList *postList,
659 ChvManager *chvmanager,
660 int parent[],
661 int stats[],
662 double cpus[],
663 int msglvl,
664 FILE *msgFile
665 ) {
666 double t1, t2 ;
667 int npost ;
668
669 npost = frontJ->nD - nelim ;
670 if ( msglvl > 1 ) {
671 fprintf(msgFile, "\n storing factor data, nelim = %d", nelim) ;
672 fflush(msgFile) ;
673 }
674 MARKTIME(t1) ;
675 frontJ->nD -= npost ;
676 frontJ->nL += npost ;
677 frontJ->nU += npost ;
678 FrontMtx_storeFront(frontmtx, frontJ, pivotsizesIV, droptol,
679 msglvl, msgFile) ;
680 frontJ->nD += npost ;
681 frontJ->nL -= npost ;
682 frontJ->nU -= npost ;
683 MARKTIME(t2) ;
684 cpus[6] += t2 - t1 ;
685 if ( postList != NULL ) {
686 Chv *postponedchv ;
687 if ( npost > 0 ) {
688 /*
689 ---------------------------------
690 there is postponed data,
691 extract and put on postponed list
692 ---------------------------------
693 */
694 postponedchv = frontJ ;
695 if ( msglvl > 2 ) {
696 fprintf(msgFile, "\n postponed data for front %d",
697 frontJ->id) ;
698 Chv_writeForHumanEye(postponedchv, msgFile) ;
699 fflush(msgFile) ;
700 }
701 } else {
702 postponedchv = NULL ;
703 }
704 if ( msglvl > 1 ) {
705 fprintf(msgFile, "\n storing postponed data %p", postponedchv);
706 fflush(msgFile) ;
707 }
708 MARKTIME(t1) ;
709 FrontMtx_storePostponedData(frontmtx, postponedchv, npost,
710 parent[frontJ->id], postList, chvmanager) ;
711 MARKTIME(t2) ;
712 cpus[5] += t2 - t1 ;
713 }
714 return ; }
715
716 /*--------------------------------------------------------------------*/
717 /*
718 --------------------------------------------------------------------
719 purpose -- to set up the link data structures
720 for a parallel factorization for process myid
721
722 return value -- pointer to IP *heads[nfront+2], which contains
723 the beginning of a list of IP objects that store the remaining
724 updates to the fronts.
725 note, heads[nfront] is the first IP object in the free list.
726 heads[nfront+1] is the base address of the allocated IP objects.
727
728 created -- 98mar07, cca
729 --------------------------------------------------------------------
730 */
731 IP **
FrontMtx_factorSetup(FrontMtx * frontmtx,IV * frontOwnersIV,int myid,int msglvl,FILE * msgFile)732 FrontMtx_factorSetup (
733 FrontMtx *frontmtx,
734 IV *frontOwnersIV,
735 int myid,
736 int msglvl,
737 FILE *msgFile
738 ) {
739 int count, ii, J, K, nadj, nfront ;
740 int *adj, *mark, *owners, *vtxToFront ;
741 IP *ip ;
742 IP **heads ;
743 IVL *symbfacIVL ;
744 /*
745 ---------------------------------------------------
746 count the number of updates this front will perform
747 ---------------------------------------------------
748 */
749 nfront = FrontMtx_nfront(frontmtx) ;
750 if ( frontOwnersIV != NULL ) {
751 owners = IV_entries(frontOwnersIV) ;
752 } else {
753 owners = NULL ;
754 }
755 symbfacIVL = frontmtx->symbfacIVL ;
756 vtxToFront = ETree_vtxToFront(frontmtx->frontETree) ;
757 mark = IVinit(nfront, -1) ;
758 for ( J = count = 0 ; J < nfront ; J++ ) {
759 if ( owners == NULL || owners[J] == myid ) {
760 IVL_listAndSize(symbfacIVL, J, &nadj, &adj) ;
761 mark[J] = J ;
762 for ( ii = 0 ; ii < nadj ; ii++ ) {
763 K = vtxToFront[adj[ii]] ;
764 if ( mark[K] != J ) {
765 mark[K] = J ;
766 count++ ;
767 }
768 }
769 }
770 }
771 /*
772 --------------------------------------------------
773 set up the update head/links for the factorization
774 --------------------------------------------------
775 */
776 ALLOCATE(heads, struct _IP *, nfront + 2) ;
777 for ( J = 0 ; J <= nfront + 1 ; J++ ) {
778 heads[J] = NULL ;
779 }
780 heads[nfront] = heads[nfront+1] = IP_init(count, 1) ;
781 IVfill(nfront, mark, -1) ;
782 for ( J = 0 ; J < nfront ; J++ ) {
783 if ( owners == NULL || owners[J] == myid ) {
784 IVL_listAndSize(symbfacIVL, J, &nadj, &adj) ;
785 mark[J] = J ;
786 for ( ii = 0 ; ii < nadj ; ii++ ) {
787 K = vtxToFront[adj[ii]] ;
788 if ( mark[K] != J ) {
789 mark[K] = J ;
790 ip = heads[nfront] ; heads[nfront] = ip->next ;
791 ip->val = J ; ip->next = heads[K] ; heads[K] = ip ;
792 if ( msglvl > 3 ) {
793 fprintf(msgFile, "\n linking L(%d,%d) to L(%d,%d)",
794 K, J, K, (ip->next == NULL) ? -1 : ip->next->val) ;
795 fflush(msgFile) ;
796 }
797 }
798 }
799 }
800 }
801 IVfree(mark) ;
802
803 return(heads) ; }
804
805 /*--------------------------------------------------------------------*/
806 /*
807 -----------------------------------------------
808 create and return the nactiveChild vector.
809 nactiveChild[J] contains the number of children
810 of J that belong to an active path
811
812 created -- 97jul03, cca
813 -----------------------------------------------
814 */
815 int *
FrontMtx_nactiveChild(FrontMtx * frontmtx,char * status,int myid)816 FrontMtx_nactiveChild (
817 FrontMtx *frontmtx,
818 char *status,
819 int myid
820 ) {
821 int J, K, nfront ;
822 int *nactiveChild, *par ;
823 /*
824 ---------------
825 check the input
826 ---------------
827 */
828 if ( frontmtx == NULL || status == NULL || myid < 0 ) {
829 fprintf(stderr, "\n fatal error in FrontMtx_nativeChild(%p,%p,%d)"
830 "\n bad input\n", frontmtx, status, myid) ;
831 exit(-1) ;
832 }
833 nfront = frontmtx->nfront ;
834 par = ETree_par(frontmtx->frontETree) ;
835 /*
836 ---------------------------------------
837 create and fill the nactiveChild vector
838 ---------------------------------------
839 */
840 nactiveChild = IVinit(nfront, 0) ;
841 for ( J = 0 ; J < nfront ; J++ ) {
842 if ( status[J] == 'W' && (K = par[J]) != -1 ) {
843 nactiveChild[K]++ ;
844 }
845 }
846 return(nactiveChild) ; }
847
848 /*--------------------------------------------------------------------*/
849 /*
850 ---------------------------------------------------------
851 create, initialize and return a Ideq object
852 that will be used for a parallel factorization,
853 forward solve, or backward solve.
854
855 the status[] vector will be set as follows:
856 status[J] = activeflag if J is on an active path
857 status[J] = inactiveflag if J is not on an active path
858
859 created -- 98mar27, cca
860 ---------------------------------------------------------
861 */
862 Ideq *
FrontMtx_setUpDequeue(FrontMtx * frontmtx,int owners[],int myid,char status[],IP * heads[],char activeFlag,char inactiveFlag,int msglvl,FILE * msgFile)863 FrontMtx_setUpDequeue (
864 FrontMtx *frontmtx,
865 int owners[],
866 int myid,
867 char status[],
868 IP *heads[],
869 char activeFlag,
870 char inactiveFlag,
871 int msglvl,
872 FILE *msgFile
873 ) {
874 Ideq *dequeue ;
875 int J, K, nfront, npath ;
876 int *par ;
877 Tree *tree ;
878 /*
879 ---------------
880 check the input
881 ---------------
882 */
883 if ( frontmtx == NULL || owners == NULL
884 || status == NULL || myid < 0 ) {
885 fprintf(stderr,
886 "\n fatal error in FrontMtx_setUpDequeue()"
887 "\n frontmtx %p, owners %p, myid %d, status %p"
888 "\n heads %p, activeFlag %c, inactiveFlag %c"
889 "\n bad input\n",
890 frontmtx, owners, myid, status, heads,
891 activeFlag, inactiveFlag) ;
892 exit(-1) ;
893 }
894 nfront = frontmtx->nfront ;
895 tree = frontmtx->tree ;
896 par = tree->par ;
897 /*
898 ------------------------------------
899 count the number of active paths in
900 the tree and set the status[] vector
901 ------------------------------------
902 */
903 CVfill(nfront, status, inactiveFlag) ;
904 for ( J = npath = 0 ; J < nfront ; J++ ) {
905 if ( status[J] == inactiveFlag ) {
906 if ( owners[J] == myid || (heads != NULL && heads[J] != NULL) ) {
907 npath++ ;
908 for ( K = J ;
909 K != -1 && status[K] != activeFlag ;
910 K = par[K] ) {
911 status[K] = activeFlag ;
912 }
913 }
914 }
915 }
916 dequeue = Ideq_new() ;
917 Ideq_resize(dequeue, npath) ;
918
919 return(dequeue) ; }
920
921 /*--------------------------------------------------------------------*/
922 /*
923 -----------------------------------------------------------------
924 purpose -- load the dequeue with the leaves of the active subtree
925 used for a factorization and forward solve
926
927 created -- 98mar27, cca
928 -----------------------------------------------------------------
929 */
930 void
FrontMtx_loadActiveLeaves(FrontMtx * frontmtx,char status[],char activeFlag,Ideq * dequeue)931 FrontMtx_loadActiveLeaves (
932 FrontMtx *frontmtx,
933 char status[],
934 char activeFlag,
935 Ideq *dequeue
936 ) {
937 int I, J, nactivechild, nfront ;
938 int *fch, *sib ;
939
940 nfront = frontmtx->nfront ;
941 fch = frontmtx->tree->fch ;
942 sib = frontmtx->tree->sib ;
943 for ( J = 0 ; J < nfront ; J++ ) {
944 if ( status[J] == activeFlag ) {
945 if ( fch[J] == -1 ) {
946 Ideq_insertAtTail(dequeue, J) ;
947 } else {
948 nactivechild = 0 ;
949 for ( I = fch[J] ; I != -1 ; I = sib[I] ) {
950 if ( status[I] == activeFlag ) {
951 nactivechild++ ;
952 break ;
953 }
954 }
955 if ( nactivechild == 0 ) {
956 Ideq_insertAtTail(dequeue, J) ;
957 }
958 }
959 }
960 }
961 return ; }
962
963 /*--------------------------------------------------------------------*/
964 /*
965 -----------------------------------------------
966 create, initialize and return a ChvList object
967 to deal with postponed chevrons
968
969 created -- 97jul03, cca
970 -----------------------------------------------
971 */
972 ChvList *
FrontMtx_postList(FrontMtx * frontmtx,IV * frontOwnersIV,int lockflag)973 FrontMtx_postList (
974 FrontMtx *frontmtx,
975 IV *frontOwnersIV,
976 int lockflag
977 ) {
978 char *flags ;
979 ChvList *postList ;
980 int count, I, J, jthread, nchild, nfront, nthread ;
981 int *counts, *fch, *frontOwners, *mark, *sib ;
982 /*
983 ---------------
984 check the input
985 ---------------
986 */
987 if ( frontmtx == NULL || frontOwnersIV == NULL
988 || lockflag < 0 || lockflag > 2 ) {
989 fprintf(stderr,
990 "\n fatal error in FrontMtx_postList(%p,%p,%d)"
991 "\n bad input\n", frontmtx, frontOwnersIV, lockflag) ;
992 exit(-1) ;
993 }
994 fch = ETree_fch(frontmtx->frontETree) ;
995 sib = ETree_sib(frontmtx->frontETree) ;
996 IV_sizeAndEntries(frontOwnersIV, &nfront, &frontOwners) ;
997 counts = IVinit(nfront+1, 0) ;
998 if ( lockflag > 0 ) {
999 flags = CVinit(nfront+1, 'N') ;
1000 } else {
1001 flags = NULL ;
1002 }
1003 nthread = 1 + IV_max(frontOwnersIV) ;
1004 mark = IVinit(nthread, -1) ;
1005 /*
1006 --------------------
1007 loop over the fronts
1008 --------------------
1009 */
1010 for ( J = 0 ; J < nfront ; J++ ) {
1011 count = nchild = 0 ;
1012 for ( I = fch[J] ; I != -1 ; I = sib[I] ) {
1013 nchild++ ;
1014 jthread = frontOwners[I] ;
1015 if ( mark[jthread] != J ) {
1016 mark[jthread] = J ;
1017 count++ ;
1018 }
1019 }
1020 counts[J] = nchild ;
1021 if ( flags != NULL ) {
1022 if ( count > 1 ) {
1023 flags[J] = 'Y' ;
1024 } else {
1025 flags[J] = 'N' ;
1026 }
1027 }
1028 }
1029 count = nchild = 0 ;
1030 for ( J = ETree_root(frontmtx->frontETree) ; J != -1 ; J = sib[J] ) {
1031 nchild++ ;
1032 jthread = frontOwners[J] ;
1033 if ( mark[jthread] != J ) {
1034 mark[jthread] = J ;
1035 count++ ;
1036 }
1037 }
1038 counts[nfront] = nchild ;
1039 if ( flags != NULL ) {
1040 if ( count > 1 ) {
1041 flags[nfront] = 'Y' ;
1042 } else {
1043 flags[nfront] = 'N' ;
1044 }
1045 }
1046 /*
1047 -----------------------------------------
1048 create and initialize the ChvList object
1049 -----------------------------------------
1050 */
1051 postList = ChvList_new() ;
1052 ChvList_init(postList, nfront+1, counts, lockflag, flags) ;
1053 /*
1054 ------------------------
1055 free the working storage
1056 ------------------------
1057 */
1058 IVfree(mark) ;
1059 IVfree(counts) ;
1060 if ( flags != NULL ) {
1061 CVfree(flags) ;
1062 }
1063
1064 return(postList) ; }
1065
1066 /*--------------------------------------------------------------------*/
1067 /*
1068 -----------------------------------------------
1069 create, initialize and return a ChvList object
1070 to deal with aggregate chevrons
1071
1072 created -- 97jul03, cca
1073 -----------------------------------------------
1074 */
1075 ChvList *
FrontMtx_aggregateList(FrontMtx * frontmtx,IV * frontOwnersIV,int lockflag)1076 FrontMtx_aggregateList (
1077 FrontMtx *frontmtx,
1078 IV *frontOwnersIV,
1079 int lockflag
1080 ) {
1081 char *flags ;
1082 ChvList *aggList ;
1083 int count, ii, I, J, jthread, K, myid, nfront, nthread, size ;
1084 int *counts, *frontOwners, *head, *indices, *link,
1085 *mark, *offsets, *vtxToFront ;
1086 IVL *symbfacIVL ;
1087
1088 /*
1089 ---------------
1090 check the input
1091 ---------------
1092 */
1093 if ( frontmtx == NULL || frontOwnersIV == NULL
1094 || lockflag < 0 || lockflag > 2 ) {
1095 fprintf(stderr,
1096 "\n fatal error in FrontMtx_aggregateList(%p,%p,%d)"
1097 "\n bad input\n", frontmtx, frontOwnersIV, lockflag) ;
1098 exit(-1) ;
1099 }
1100 symbfacIVL = frontmtx->symbfacIVL ;
1101 vtxToFront = ETree_vtxToFront(frontmtx->frontETree) ;
1102 IV_sizeAndEntries(frontOwnersIV, &nfront, &frontOwners) ;
1103 nthread = 1 + IV_max(frontOwnersIV) ;
1104 mark = IVinit(nthread, -1) ;
1105 head = IVinit(nfront, -1) ;
1106 link = IVinit(nfront, -1) ;
1107 offsets = IVinit(nfront, 0) ;
1108 counts = IVinit(nfront, 0) ;
1109 if ( lockflag > 0 ) {
1110 flags = CVinit(nfront, 'N') ;
1111 } else {
1112 flags = NULL ;
1113 }
1114 /*
1115 --------------------
1116 loop over the fronts
1117 --------------------
1118 */
1119 for ( J = 0 ; J < nfront ; J++ ) {
1120 myid = frontOwners[J] ;
1121 #if MYDEBUG > 0
1122 fprintf(stdout, "\n\n front %d, owner %d", J, myid) ;
1123 fflush(stdout) ;
1124 #endif
1125 mark[myid] = J ;
1126 count = 0 ;
1127 /*
1128 ---------------------------------------------------
1129 loop over all descendent fronts that might update J
1130 ---------------------------------------------------
1131 */
1132 while ( (I = head[J]) != -1 ) {
1133 head[J] = link[I] ;
1134 jthread = frontOwners[I] ;
1135 #if MYDEBUG > 0
1136 fprintf(stdout, "\n descendent front %d, owner %d", I,
1137 jthread) ;
1138 fflush(stdout) ;
1139 #endif
1140 if ( mark[jthread] != J ) {
1141 /*
1142 --------------------------------
1143 expect an aggregate from jthread
1144 --------------------------------
1145 */
1146 #if MYDEBUG > 0
1147 fprintf(stdout, ", incoming aggregate") ;
1148 fflush(stdout) ;
1149 #endif
1150 mark[jthread] = J ;
1151 count++ ;
1152 }
1153 /*
1154 --------------------------------------------------
1155 link front I to next ancestor that it does not own
1156 --------------------------------------------------
1157 */
1158 IVL_listAndSize(symbfacIVL, I, &size, &indices) ;
1159 for ( ii = offsets[I] ; ii < size ; ii++ ) {
1160 if ( (K = vtxToFront[indices[ii]]) > J
1161 && frontOwners[K] != jthread ) {
1162 #if MYDEBUG > 0
1163 fprintf(stdout, ", link to %d", K) ;
1164 fflush(stdout) ;
1165 #endif
1166 offsets[I] = ii ;
1167 link[I] = head[K] ;
1168 head[K] = I ;
1169 break ;
1170 }
1171 }
1172 }
1173 /*
1174 -------------------------------------
1175 set the number of incoming aggregates
1176 -------------------------------------
1177 */
1178 counts[J] = count ;
1179 #if MYDEBUG > 0
1180 fprintf(stdout, "\n counts[%d] = %d", J, counts[J]) ;
1181 fflush(stdout) ;
1182 #endif
1183 /*
1184 ---------------------------------------------------
1185 set the flags to see if the list needs to be locked
1186 ---------------------------------------------------
1187 */
1188 if ( flags != NULL ) {
1189 if ( count > 1 ) {
1190 flags[J] = 'Y' ;
1191 } else {
1192 flags[J] = 'N' ;
1193 }
1194 #if MYDEBUG > 0
1195 fprintf(stdout, ", flags[%d] = %c", J, flags[J]) ;
1196 fflush(stdout) ;
1197 #endif
1198 }
1199 /*
1200 --------------------------------------------------
1201 link front J to next ancestor that it does not own
1202 --------------------------------------------------
1203 */
1204 IVL_listAndSize(symbfacIVL, J, &size, &indices) ;
1205 for ( ii = 0 ; ii < size ; ii++ ) {
1206 if ( (K = vtxToFront[indices[ii]]) > J && frontOwners[K] != myid
1207 ) {
1208 #if MYDEBUG > 0
1209 fprintf(stdout, ", linking to %d", K) ;
1210 fflush(stdout) ;
1211 #endif
1212 offsets[J] = ii ;
1213 link[J] = head[K] ;
1214 head[K] = J ;
1215 break ;
1216 }
1217 }
1218 }
1219 #if MYDEBUG > 0
1220 fprintf(stdout, "\n counts") ;
1221 IVfprintf(stdout, nfront, counts) ;
1222 fflush(stdout) ;
1223 #endif
1224 /*
1225 -----------------------------------------
1226 create and initialize the ChvList object
1227 -----------------------------------------
1228 */
1229 aggList = ChvList_new() ;
1230 ChvList_init(aggList, nfront, counts, lockflag, flags) ;
1231 /*
1232 ------------------------
1233 free the working storage
1234 ------------------------
1235 */
1236 IVfree(counts) ;
1237 IVfree(head) ;
1238 IVfree(link) ;
1239 IVfree(offsets) ;
1240 IVfree(mark) ;
1241 if ( flags != NULL ) {
1242 CVfree(flags) ;
1243 }
1244 return(aggList) ; }
1245
1246 /*--------------------------------------------------------------------*/
1247