1 /*  factorMT.c  */
2 
3 #include "../spoolesMT.h"
4 #include "../../Ideq.h"
5 #include "../../timings.h"
6 
7 /*--------------------------------------------------------------------*/
8 /*
9    -----------------------------
10    worker method for each thread
11    -----------------------------
12 */
13 static void * FrontMtx_workerFactor ( void *arg ) ;
14 /*
15    ----------------------------------------------------------------
16   this object is used during a threaded factorization
17 
18    pencil    -- matrix pencil A + sigma * B
19    tau       -- upper bound on factor entries when pivoting enabled
20    droptol   -- drop tolerance used for sparse fronts
21    ownersIV  -- map from fronts to threads
22    lookahead -- parameter used to control computation lookahead
23       0   --> no lookahead
24       > 0 --> look up a number of levels up the tree
25 
26    frontmtx  -- object used to store factorization
27    manager   -- object used to manage working Chv objects
28    aggList   -- object used to store aggregate data
29    postList  -- object used to store postponed data
30    perror    -- pointer to external error flag
31 
32    heads   -- IP vector to maintain update lists
33    myid    -- thread id
34    fronts  -- vector of pointers to active fronts
35    stats   -- int statistics vector
36    cpus    -- double vector to store breakdown of cpu times
37    msglvl  -- message level
38    msgFile -- message file
39 
40    created -- 97may30, cca
41    ----------------------------------------------------------------
42 */
43 typedef struct _FactorData   FactorData ;
44 struct _FactorData {
45 /*
46    -------------------------
47    global data, not modified
48    -------------------------
49 */
50    Pencil      *pencil   ;
51    double      tau       ;
52    double      droptol   ;
53    IV          *ownersIV ;
54    int         lookahead ;
55 /*
56    ---------------------
57    shared data, modified
58    ---------------------
59 */
60    FrontMtx    *frontmtx   ;
61    ChvManager  *chvmanager ;
62    ChvList     *aggList    ;
63    ChvList     *postList   ;
64    int         *perror     ;
65 /*
66    ----------
67    local data
68    ----------
69 */
70    int          myid      ;
71    int          stats[10] ;
72    double       cpus[20]  ;
73    int          msglvl    ;
74    FILE         *msgFile  ;
75 } ;
76 /*--------------------------------------------------------------------*/
77 /*
78    -------------------------------------------------------------------
79    parallel factorization method for A.
80    all but two input parameters are the same as the serial method.
81    this is a wrapper method around FrontMtx_MT_factorInpMtx().
82 
83    ownersIV  -- pointer to IV object that holds map from fronts
84                 to threads
85    lookahead -- lookahead parameter that allows computation at
86                 higher levels of the front tree to proceed when
87                 lower fronts are not yet finish. use lookahead = 0
88                 to turn off this option. otherwise lookahead ancestors
89                 of an active unfinished front can be active.
90 
91    return value -- pointer to the first Chv object in a list
92                    that contains postponed data
93 
94    created -- 98may16, cca
95    -------------------------------------------------------------------
96 */
97 Chv *
FrontMtx_MT_factorInpMtx(FrontMtx * frontmtx,InpMtx * inpmtx,double tau,double droptol,ChvManager * chvmanager,IV * ownersIV,int lookahead,int * perror,double cpus[],int stats[],int msglvl,FILE * msgFile)98 FrontMtx_MT_factorInpMtx (
99    FrontMtx     *frontmtx,
100    InpMtx       *inpmtx,
101    double       tau,
102    double       droptol,
103    ChvManager   *chvmanager,
104    IV           *ownersIV,
105    int          lookahead,
106    int          *perror,
107    double       cpus[],
108    int          stats[],
109    int          msglvl,
110    FILE         *msgFile
111 ) {
112 Chv      *rootchv ;
113 double   sigma[2] = {0.0, 0.0} ;
114 Pencil   pencil ;
115 
116 Pencil_setDefaultFields(&pencil) ;
117 Pencil_init(&pencil, frontmtx->type, frontmtx->symmetryflag,
118             inpmtx, sigma, NULL) ;
119 rootchv = FrontMtx_MT_factorPencil(frontmtx, &pencil, tau, droptol,
120                                  chvmanager, ownersIV, lookahead,
121                                  perror, cpus, stats, msglvl, msgFile) ;
122 
123 return(rootchv) ; }
124 
125 /*--------------------------------------------------------------------*/
126 /*
127    -------------------------------------------------------------------
128    parallel factorization method for A + sigma*B.
129    all but two input parameters are the same
130    as the FrontMtx_factorPencil method.
131 
132    ownersIV  -- pointer to IV object that holds map from fronts
133                 to threads
134    lookahead -- lookahead parameter that allows computation at
135                 higher levels of the front tree to proceed when
136                 lower fronts are not yet finish. use lookahead = 0
137                 to turn off this option. otherwise lookahead ancestors
138                 of an active unfinished front can be active.
139 
140    return value -- pointer to the first Chv object in a list
141                    that contains postponed data
142 
143    created -- 98may16, cca
144    -------------------------------------------------------------------
145 */
146 Chv *
FrontMtx_MT_factorPencil(FrontMtx * frontmtx,Pencil * pencil,double tau,double droptol,ChvManager * chvmanager,IV * ownersIV,int lookahead,int * perror,double cpus[],int stats[],int msglvl,FILE * msgFile)147 FrontMtx_MT_factorPencil (
148    FrontMtx     *frontmtx,
149    Pencil       *pencil,
150    double       tau,
151    double       droptol,
152    ChvManager   *chvmanager,
153    IV           *ownersIV,
154    int          lookahead,
155    int          *perror,
156    double       cpus[],
157    int          stats[],
158    int          msglvl,
159    FILE         *msgFile
160 ) {
161 char         buffer[20] ;
162 Chv          *rootchv ;
163 ChvList      *aggList ;
164 ChvList      *postList ;
165 double       t0, t1, t2 ;
166 FactorData   *data, *dataObjects ;
167 FILE         *fp ;
168 int          ierr, ii, myid, nfront, nthread, rc ;
169 int          *owners ;
170 /*
171    --------------
172    check the data
173    --------------
174 */
175 MARKTIME(t0) ;
176 if (  frontmtx == NULL || pencil == NULL || tau < 1.0 || droptol < 0.0
177    || ownersIV == NULL || lookahead < 0 || cpus == NULL || stats == NULL
178    || msglvl < 0 || (msglvl > 0 && msgFile == NULL) ) {
179    fprintf(stderr, "\n fatal error in FrontMtx_MT_factorPencil()"
180            "\n frontmtx = %p, pencil = %p"
181            "\n tau = %f, droptol = %f, ownersIV = %p, lookahead = %d"
182            "\n cpus = %p, stats = %p, msglvl = %d, msgFile = %p"
183            "\n bad input\n", frontmtx, pencil, tau, droptol,
184            ownersIV, lookahead, cpus, stats, msglvl, msgFile) ;
185    exit(-1) ;
186 }
187 IV_sizeAndEntries(ownersIV, &nfront, &owners) ;
188 nthread = 1 + IV_max(ownersIV) ;
189 /*
190    --------------------------------------------------------------------
191    create :
192    (1) a ChvList object to handle the lists of aggregate Chv objects,
193    (2) if pivoting is enabled, a ChvList object to handle
194        the lists of postponed Chv objects.
195    --------------------------------------------------------------------
196 */
197 MARKTIME(t1) ;
198 aggList = FrontMtx_aggregateList(frontmtx, ownersIV, 1) ;
199 if ( FRONTMTX_IS_PIVOTING(frontmtx) ) {
200    postList = FrontMtx_postList(frontmtx, ownersIV, 1) ;
201 } else {
202    postList = NULL ;
203 }
204 MARKTIME(t2) ;
205 if ( msglvl > 1 ) {
206    fprintf(msgFile, "\n CPU %8.3f : initialize lists and manager",
207            t2 - t1) ;
208 }
209 /*
210    ------------------
211    set the error flag
212    ------------------
213 */
214 *perror = -1 ;
215 /*
216    ----------------------------------------------------------
217    create nthread FactorData objects and load with their data
218    ----------------------------------------------------------
219 */
220 MARKTIME(t1) ;
221 ALLOCATE(dataObjects, struct _FactorData, nthread) ;
222 for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
223    data->pencil     = pencil     ;
224    data->tau        = tau        ;
225    data->droptol    = droptol    ;
226    data->ownersIV   = ownersIV   ;
227    data->lookahead  = lookahead  ;
228    data->frontmtx   = frontmtx   ;
229    data->chvmanager = chvmanager ;
230    data->aggList    = aggList    ;
231    data->postList   = postList   ;
232    data->perror     = perror     ;
233    data->myid       = myid       ;
234    IVzero(10, data->stats) ;
235    DVzero(20, data->cpus) ;
236    data->msglvl = msglvl ;
237    if ( msglvl > 0 ) {
238       sprintf(buffer, "res.%d", myid) ;
239       if ( (fp = fopen(buffer, "w")) == NULL ) {
240          fprintf(stderr, "\n fatal error, unable to open file %s",
241                  buffer) ;
242          exit(-1) ;
243       }
244       data->msgFile = fp ;
245    } else {
246       data->msgFile = NULL ;
247    }
248 }
249 MARKTIME(t2) ;
250 if ( msglvl > 1 ) {
251    fprintf(msgFile, "\n CPU %8.3f : initialize data objects", t2 - t1) ;
252 }
253 /*
254    ---------------------------
255    switch over the thread type
256    ---------------------------
257 */
258 #if THREAD_TYPE == TT_SOLARIS
259 /*
260    ---------------
261    solaris threads
262    ---------------
263 */
264 MARKTIME(t1) ;
265 thr_setconcurrency(nthread) ;
266 MARKTIME(t2) ;
267 if ( msglvl > 1 ) {
268    fprintf(msgFile, "\n CPU %8.3f : set concurrency time", t2 - t1) ;
269 }
270 MARKTIME(t1) ;
271 for ( myid = 0, data = dataObjects ;
272       myid < nthread - 1 ;
273       myid++, data++ ) {
274    rc = thr_create(NULL, 0, FrontMtx_workerFactor, data, 0, NULL) ;
275    if ( rc != 0 ) {
276       fprintf(stderr,
277               "\n fatal error, myid = %d, rc = %d from thr_create",
278               myid, rc) ;
279       exit(-1) ;
280    }
281 }
282 MARKTIME(t2) ;
283 if ( msglvl > 1 ) {
284    fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
285 }
286 MARKTIME(t1) ;
287 FrontMtx_workerFactor(data) ;
288 for ( myid = 0 ; myid < nthread - 1 ; myid++ ) {
289    thr_join(0, 0, 0) ;
290 }
291 MARKTIME(t2) ;
292 if ( msglvl > 1 ) {
293    fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
294 }
295 #endif
296 #if THREAD_TYPE == TT_POSIX
297 /*
298    -------------
299    POSIX threads
300    -------------
301 */
302 {
303 pthread_t        *tids ;
304 pthread_attr_t   attr  ;
305 void             *status ;
306 /*
307 #####   NOTE: for SGI machines, this command must be present
308 #####         for the thread scheduling to be efficient.
309 #####         this is NOT a POSIX call, but SGI needs it anyway
310 pthread_setconcurrency(nthread) ;
311 */
312 pthread_attr_init(&attr) ;
313 /*
314 pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ;
315 */
316 pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS) ;
317 ALLOCATE(tids, pthread_t, nthread) ;
318 MARKTIME(t1) ;
319 for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
320 /*
321    rc = pthread_create(&tids[myid], &attr,
322                        FrontMtx_workerFactor, data) ;
323 */
324    rc = pthread_create(&tids[myid], NULL, FrontMtx_workerFactor, data) ;
325    if ( rc != 0 ) {
326       fprintf(stderr,
327               "\n fatal error, myid = %d, rc = %d from pthread_create",
328               myid, rc) ;
329       exit(-1) ;
330    } else if ( msglvl > 1 ) {
331       fprintf(stderr, "\n thread %d created", tids[myid]) ;
332    }
333 }
334 MARKTIME(t2) ;
335 if ( msglvl > 1 ) {
336    fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
337 }
338 MARKTIME(t1) ;
339 for ( myid = 0 ; myid < nthread ; myid++ ) {
340    pthread_join(tids[myid], &status) ;
341 }
342 MARKTIME(t2) ;
343 if ( msglvl > 1 ) {
344    fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
345 }
346 FREE(tids) ;
347 pthread_attr_destroy(&attr) ;
348 }
349 #endif
350 /*
351    --------------------------------------------
352    get a pointer to any postponed root chevrons
353    --------------------------------------------
354 */
355 if ( postList != NULL ) {
356    rootchv = ChvList_getList(postList, nfront) ;
357 } else {
358    rootchv = NULL ;
359 }
360 /*
361    -------------------
362    fill the statistics
363    -------------------
364 */
365 for ( myid = 0, data = dataObjects ;
366       myid < nthread ;
367       myid++, data++ ) {
368    if ( msglvl > 3 ) {
369       fprintf(msgFile, "\n thread %d stats", myid) ;
370       IVfp80(msgFile, 10, data->stats, 20, &ierr) ;
371       fprintf(msgFile, "\n thread %d cpus", myid) ;
372       DVfprintf(msgFile, 10, data->cpus) ;
373    }
374    for ( ii = 0 ; ii < 10 ; ii++ ) {
375       stats[ii] += data->stats[ii] ;
376    }
377    for ( ii = 0 ; ii <= 10 ; ii++ ) {
378       cpus[ii] += data->cpus[ii] ;
379    }
380 }
381 stats[3] = frontmtx->nentD ;
382 stats[4] = frontmtx->nentL ;
383 stats[5] = frontmtx->nentU ;
384 stats[6] = frontmtx->nlocks ;
385 stats[7] = aggList->nlocks ;
386 if ( postList != NULL ) {
387    stats[8] = postList->nlocks ;
388 }
389 if ( msglvl > 0 ) {
390    fprintf(msgFile,
391            "\n\n factorization has finished"
392            "\n %d locks of the front matrix"
393            "\n %d locks of the aggregate list",
394            frontmtx->nlocks, aggList->nlocks) ;
395    if ( postList != NULL ) {
396       fprintf(msgFile, "\n %d locks of the aggregate list",
397           aggList->nlocks) ;
398    }
399 }
400 /*
401    -------------
402    free the data
403    -------------
404 */
405 MARKTIME(t1) ;
406 ChvList_free(aggList) ;
407 if ( postList != NULL ) {
408    ChvList_free(postList) ;
409 }
410 FREE(dataObjects) ;
411 MARKTIME(t2) ;
412 if ( msglvl > 1 ) {
413    fprintf(msgFile, "\n CPU %8.3f : total time", t2 - t1) ;
414 }
415 
416 return(rootchv) ; }
417 
418 /*--------------------------------------------------------------------*/
419 /*
420    ---------------------------------------------
421    purpose -- worker method to factor the matrix
422 
423 
424    created -- 97may30, cca
425    ---------------------------------------------
426 */
427 static void *
FrontMtx_workerFactor(void * arg)428 FrontMtx_workerFactor (
429    void   *arg
430 ) {
431 char          *status ;
432 Chv           **fronts ;
433 ChvList       *aggList, *postList ;
434 ChvManager    *chvmanager ;
435 FactorData    *data ;
436 FrontMtx      *frontmtx ;
437 Pencil        *pencil ;
438 double        droptol, tau, t1, t2 ;
439 double        *cpus ;
440 DV            *workDV ;
441 ETree         *frontETree ;
442 FILE          *msgFile ;
443 Ideq          *deq ;
444 int           J, K, lookahead, msglvl, myid, nfront ;
445 int           *nactiveChild, *owners, *par, *stats ;
446 IP            **heads ;
447 IV            *ownersIV, pivotsizesIV ;
448 Tree          *tree ;
449 #if THREAD_TYPE == TT_POSIX
450 /*
451 fprintf(stdout, "\n ### inside workerFactor, pthread_self() = %d",
452         pthread_self()) ;
453 fflush(stdout) ;
454 */
455 #endif
456 /*
457    -------------------------------
458    extract pointers and dimensions
459    -------------------------------
460 */
461 MARKTIME(t1) ;
462 data = (FactorData *) arg ;
463 pencil     = data->pencil   ;
464 tau        = data->tau      ;
465 droptol    = data->droptol  ;
466 msglvl     = data->msglvl   ;
467 msgFile    = data->msgFile  ;
468 myid       = data->myid     ;
469 frontmtx   = data->frontmtx ;
470 chvmanager = data->chvmanager ;
471 aggList    = data->aggList    ;
472 postList   = data->postList   ;
473 frontETree = frontmtx->frontETree ;
474 tree       = ETree_tree(frontETree) ;
475 nfront     = ETree_nfront(frontETree) ;
476 par        = ETree_par(frontETree) ;
477 lookahead  = data->lookahead ;
478 ownersIV   = data->ownersIV ;
479 owners     = IV_entries(ownersIV) ;
480 stats      = data->stats;
481 cpus       = data->cpus ;
482 #if THREAD_TYPE == TT_SOLARIS
483 if ( msglvl > 2 ) {
484    fprintf(stdout,
485            "\n ### inside workerFactor, myid = %d, thr_self() = %d",
486            myid, thr_self()) ;
487    fflush(stdout) ;
488 }
489 #endif
490 #if THREAD_TYPE == TT_POSIX
491 if ( msglvl > 2 ) {
492    fprintf(stdout, "\n ### inside workerFactor, myid = %d"
493                    ", pthread_self() = %d", myid, pthread_self()) ;
494    fflush(stdout) ;
495 }
496 #endif
497 /*
498    ---------------------------------------------------
499    initialize the heads[] vector for the owned updates
500    ---------------------------------------------------
501 */
502 heads = FrontMtx_factorSetup(frontmtx, ownersIV, myid,
503                               msglvl, msgFile) ;
504 /*
505    ----------------------------------------------------------------
506    initialize the Ideq object that holds the initial fronts
507    of the active paths, owned fronts with no children that
508    are owned or updates by this thread.
509    status[J] == 'W' --> J belongs to an active path for this thread
510    ----------------------------------------------------------------
511 */
512 status = CVinit(nfront, 'F') ;
513 deq = FrontMtx_setUpDequeue(frontmtx, IV_entries(ownersIV), myid,
514                              status, heads, 'W', 'F', msglvl, msgFile) ;
515 FrontMtx_loadActiveLeaves(frontmtx, status, 'W', deq) ;
516 if ( msglvl > 2 ) {
517    fprintf(msgFile, "\n\n status") ;
518    CVfprintf(msgFile, nfront, status) ;
519    fflush(msgFile) ;
520 }
521 /*
522    -----------------------------------------------
523    initialize the nactiveChild[] vector,
524    nactiveChild[J] measures the number of children
525    that belong to active paths of this thread
526    -----------------------------------------------
527 */
528 nactiveChild = FrontMtx_nactiveChild(frontmtx, status, myid) ;
529 if ( msglvl > 2 ) {
530    fprintf(msgFile, "\n\n nactiveChild") ;
531    IVfprintf(msgFile, nfront, nactiveChild) ;
532    fflush(msgFile) ;
533 }
534 /*
535    ------------------------------
536    initialize the working storage
537    ------------------------------
538 */
539 ALLOCATE(fronts, Chv *, nfront) ;
540 for ( J = 0 ; J < nfront ; J++ ) {
541    fronts[J] = NULL ;
542 }
543 IV_setDefaultFields(&pivotsizesIV) ;
544 /*
545 if ( FRONTMTX_IS_PIVOTING(frontmtx)
546    && (  FRONTMTX_IS_SYMMETRIC(frontmtx)
547       || FRONTMTX_IS_HERMITIAN(frontmtx) ) ) {
548    pivotsizesIV = IV_new() ;
549 } else {
550    fprintf(msgFile, "\n no pivotsizesIV needed") ;
551    fflush(msgFile) ;
552    pivotsizesIV = NULL ;
553 }
554 */
555 workDV = DV_new() ;
556 /*
557    ---------------------------
558    loop while a path is active
559    ---------------------------
560 */
561 IVzero(10, stats) ;
562 while ( (J = Ideq_removeFromHead(deq)) != -1 ) {
563    if ( msglvl > 1 ) {
564       fprintf(msgFile, "\n\n ### checking out front %d", J) ;
565       fflush(msgFile) ;
566    }
567    FrontMtx_factorVisit(frontmtx, pencil, J, myid, owners, fronts,
568       lookahead, data->tau, data->droptol, status, heads, &pivotsizesIV,
569       workDV, par, data->aggList, data->postList,
570       data->chvmanager, stats, cpus, msglvl, msgFile) ;
571    if ( status[J] == 'E' ) {
572 /*
573       ----------------------------
574       error encountered at front J
575       ----------------------------
576 */
577       *(data->perror) = J ;
578       break ;
579    } else if ( status[J] == 'F' ) {
580       if ( msglvl > 1 ) {
581          fprintf(msgFile, "\n\n front %d is finished", J) ;
582          fflush(msgFile) ;
583       }
584       if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) {
585          if ( msglvl > 1 ) {
586             fprintf(msgFile, "\n\n adding front %d to dequeue", K) ;
587             fflush(msgFile) ;
588          }
589          Ideq_insertAtHead(deq, K) ;
590       }
591    } else {
592       if ( msglvl > 1 ) {
593          fprintf(msgFile, "\n\n front %d not yet done", J) ;
594          fflush(msgFile) ;
595       }
596       Ideq_insertAtTail(deq, J) ;
597    }
598    if ( *(data->perror) >= 0 ) {
599 /*
600       ------------------------------------
601       error encountered, break out of loop
602       ------------------------------------
603 */
604       break ;
605    }
606 }
607 /*
608    ------------------------
609    free the working storage
610    ------------------------
611 */
612 IV_clearData(&pivotsizesIV) ;
613 if ( workDV != NULL ) {
614    DV_free(workDV) ;
615 }
616 CVfree(status) ;
617 IVfree(nactiveChild) ;
618 IP_free(heads[nfront+1]) ;
619 FREE(heads) ;
620 FREE(fronts) ;
621 Ideq_free(deq) ;
622 MARKTIME(t2) ;
623 cpus[10] = t2 - t1 ;
624 cpus[9] = t2 - t1 - cpus[0] - cpus[1] - cpus[2] - cpus[3]
625         - cpus[4] - cpus[5] - cpus[6] - cpus[7] - cpus[8] ;
626 
627 return(NULL) ; }
628 
629 /*--------------------------------------------------------------------*/
630