1 /* factorMT.c */
2
3 #include "../spoolesMT.h"
4 #include "../../Ideq.h"
5 #include "../../timings.h"
6
7 /*--------------------------------------------------------------------*/
8 /*
9 -----------------------------
10 worker method for each thread
11 -----------------------------
12 */
13 static void * FrontMtx_workerFactor ( void *arg ) ;
14 /*
15 ----------------------------------------------------------------
16 this object is used during a threaded factorization
17
18 pencil -- matrix pencil A + sigma * B
19 tau -- upper bound on factor entries when pivoting enabled
20 droptol -- drop tolerance used for sparse fronts
21 ownersIV -- map from fronts to threads
22 lookahead -- parameter used to control computation lookahead
23 0 --> no lookahead
24 > 0 --> look up a number of levels up the tree
25
26 frontmtx -- object used to store factorization
27 manager -- object used to manage working Chv objects
28 aggList -- object used to store aggregate data
29 postList -- object used to store postponed data
30 perror -- pointer to external error flag
31
32 heads -- IP vector to maintain update lists
33 myid -- thread id
34 fronts -- vector of pointers to active fronts
35 stats -- int statistics vector
36 cpus -- double vector to store breakdown of cpu times
37 msglvl -- message level
38 msgFile -- message file
39
40 created -- 97may30, cca
41 ----------------------------------------------------------------
42 */
43 typedef struct _FactorData FactorData ;
44 struct _FactorData {
45 /*
46 -------------------------
47 global data, not modified
48 -------------------------
49 */
50 Pencil *pencil ;
51 double tau ;
52 double droptol ;
53 IV *ownersIV ;
54 int lookahead ;
55 /*
56 ---------------------
57 shared data, modified
58 ---------------------
59 */
60 FrontMtx *frontmtx ;
61 ChvManager *chvmanager ;
62 ChvList *aggList ;
63 ChvList *postList ;
64 int *perror ;
65 /*
66 ----------
67 local data
68 ----------
69 */
70 int myid ;
71 int stats[10] ;
72 double cpus[20] ;
73 int msglvl ;
74 FILE *msgFile ;
75 } ;
76 /*--------------------------------------------------------------------*/
77 /*
78 -------------------------------------------------------------------
79 parallel factorization method for A.
80 all but two input parameters are the same as the serial method.
81 this is a wrapper method around FrontMtx_MT_factorInpMtx().
82
83 ownersIV -- pointer to IV object that holds map from fronts
84 to threads
85 lookahead -- lookahead parameter that allows computation at
86 higher levels of the front tree to proceed when
87 lower fronts are not yet finish. use lookahead = 0
88 to turn off this option. otherwise lookahead ancestors
89 of an active unfinished front can be active.
90
91 return value -- pointer to the first Chv object in a list
92 that contains postponed data
93
94 created -- 98may16, cca
95 -------------------------------------------------------------------
96 */
97 Chv *
FrontMtx_MT_factorInpMtx(FrontMtx * frontmtx,InpMtx * inpmtx,double tau,double droptol,ChvManager * chvmanager,IV * ownersIV,int lookahead,int * perror,double cpus[],int stats[],int msglvl,FILE * msgFile)98 FrontMtx_MT_factorInpMtx (
99 FrontMtx *frontmtx,
100 InpMtx *inpmtx,
101 double tau,
102 double droptol,
103 ChvManager *chvmanager,
104 IV *ownersIV,
105 int lookahead,
106 int *perror,
107 double cpus[],
108 int stats[],
109 int msglvl,
110 FILE *msgFile
111 ) {
112 Chv *rootchv ;
113 double sigma[2] = {0.0, 0.0} ;
114 Pencil pencil ;
115
116 Pencil_setDefaultFields(&pencil) ;
117 Pencil_init(&pencil, frontmtx->type, frontmtx->symmetryflag,
118 inpmtx, sigma, NULL) ;
119 rootchv = FrontMtx_MT_factorPencil(frontmtx, &pencil, tau, droptol,
120 chvmanager, ownersIV, lookahead,
121 perror, cpus, stats, msglvl, msgFile) ;
122
123 return(rootchv) ; }
124
125 /*--------------------------------------------------------------------*/
126 /*
127 -------------------------------------------------------------------
128 parallel factorization method for A + sigma*B.
129 all but two input parameters are the same
130 as the FrontMtx_factorPencil method.
131
132 ownersIV -- pointer to IV object that holds map from fronts
133 to threads
134 lookahead -- lookahead parameter that allows computation at
135 higher levels of the front tree to proceed when
136 lower fronts are not yet finish. use lookahead = 0
137 to turn off this option. otherwise lookahead ancestors
138 of an active unfinished front can be active.
139
140 return value -- pointer to the first Chv object in a list
141 that contains postponed data
142
143 created -- 98may16, cca
144 -------------------------------------------------------------------
145 */
146 Chv *
FrontMtx_MT_factorPencil(FrontMtx * frontmtx,Pencil * pencil,double tau,double droptol,ChvManager * chvmanager,IV * ownersIV,int lookahead,int * perror,double cpus[],int stats[],int msglvl,FILE * msgFile)147 FrontMtx_MT_factorPencil (
148 FrontMtx *frontmtx,
149 Pencil *pencil,
150 double tau,
151 double droptol,
152 ChvManager *chvmanager,
153 IV *ownersIV,
154 int lookahead,
155 int *perror,
156 double cpus[],
157 int stats[],
158 int msglvl,
159 FILE *msgFile
160 ) {
161 char buffer[20] ;
162 Chv *rootchv ;
163 ChvList *aggList ;
164 ChvList *postList ;
165 double t0, t1, t2 ;
166 FactorData *data, *dataObjects ;
167 FILE *fp ;
168 int ierr, ii, myid, nfront, nthread, rc ;
169 int *owners ;
170 /*
171 --------------
172 check the data
173 --------------
174 */
175 MARKTIME(t0) ;
176 if ( frontmtx == NULL || pencil == NULL || tau < 1.0 || droptol < 0.0
177 || ownersIV == NULL || lookahead < 0 || cpus == NULL || stats == NULL
178 || msglvl < 0 || (msglvl > 0 && msgFile == NULL) ) {
179 fprintf(stderr, "\n fatal error in FrontMtx_MT_factorPencil()"
180 "\n frontmtx = %p, pencil = %p"
181 "\n tau = %f, droptol = %f, ownersIV = %p, lookahead = %d"
182 "\n cpus = %p, stats = %p, msglvl = %d, msgFile = %p"
183 "\n bad input\n", frontmtx, pencil, tau, droptol,
184 ownersIV, lookahead, cpus, stats, msglvl, msgFile) ;
185 exit(-1) ;
186 }
187 IV_sizeAndEntries(ownersIV, &nfront, &owners) ;
188 nthread = 1 + IV_max(ownersIV) ;
189 /*
190 --------------------------------------------------------------------
191 create :
192 (1) a ChvList object to handle the lists of aggregate Chv objects,
193 (2) if pivoting is enabled, a ChvList object to handle
194 the lists of postponed Chv objects.
195 --------------------------------------------------------------------
196 */
197 MARKTIME(t1) ;
198 aggList = FrontMtx_aggregateList(frontmtx, ownersIV, 1) ;
199 if ( FRONTMTX_IS_PIVOTING(frontmtx) ) {
200 postList = FrontMtx_postList(frontmtx, ownersIV, 1) ;
201 } else {
202 postList = NULL ;
203 }
204 MARKTIME(t2) ;
205 if ( msglvl > 1 ) {
206 fprintf(msgFile, "\n CPU %8.3f : initialize lists and manager",
207 t2 - t1) ;
208 }
209 /*
210 ------------------
211 set the error flag
212 ------------------
213 */
214 *perror = -1 ;
215 /*
216 ----------------------------------------------------------
217 create nthread FactorData objects and load with their data
218 ----------------------------------------------------------
219 */
220 MARKTIME(t1) ;
221 ALLOCATE(dataObjects, struct _FactorData, nthread) ;
222 for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
223 data->pencil = pencil ;
224 data->tau = tau ;
225 data->droptol = droptol ;
226 data->ownersIV = ownersIV ;
227 data->lookahead = lookahead ;
228 data->frontmtx = frontmtx ;
229 data->chvmanager = chvmanager ;
230 data->aggList = aggList ;
231 data->postList = postList ;
232 data->perror = perror ;
233 data->myid = myid ;
234 IVzero(10, data->stats) ;
235 DVzero(20, data->cpus) ;
236 data->msglvl = msglvl ;
237 if ( msglvl > 0 ) {
238 sprintf(buffer, "res.%d", myid) ;
239 if ( (fp = fopen(buffer, "w")) == NULL ) {
240 fprintf(stderr, "\n fatal error, unable to open file %s",
241 buffer) ;
242 exit(-1) ;
243 }
244 data->msgFile = fp ;
245 } else {
246 data->msgFile = NULL ;
247 }
248 }
249 MARKTIME(t2) ;
250 if ( msglvl > 1 ) {
251 fprintf(msgFile, "\n CPU %8.3f : initialize data objects", t2 - t1) ;
252 }
253 /*
254 ---------------------------
255 switch over the thread type
256 ---------------------------
257 */
258 #if THREAD_TYPE == TT_SOLARIS
259 /*
260 ---------------
261 solaris threads
262 ---------------
263 */
264 MARKTIME(t1) ;
265 thr_setconcurrency(nthread) ;
266 MARKTIME(t2) ;
267 if ( msglvl > 1 ) {
268 fprintf(msgFile, "\n CPU %8.3f : set concurrency time", t2 - t1) ;
269 }
270 MARKTIME(t1) ;
271 for ( myid = 0, data = dataObjects ;
272 myid < nthread - 1 ;
273 myid++, data++ ) {
274 rc = thr_create(NULL, 0, FrontMtx_workerFactor, data, 0, NULL) ;
275 if ( rc != 0 ) {
276 fprintf(stderr,
277 "\n fatal error, myid = %d, rc = %d from thr_create",
278 myid, rc) ;
279 exit(-1) ;
280 }
281 }
282 MARKTIME(t2) ;
283 if ( msglvl > 1 ) {
284 fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
285 }
286 MARKTIME(t1) ;
287 FrontMtx_workerFactor(data) ;
288 for ( myid = 0 ; myid < nthread - 1 ; myid++ ) {
289 thr_join(0, 0, 0) ;
290 }
291 MARKTIME(t2) ;
292 if ( msglvl > 1 ) {
293 fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
294 }
295 #endif
296 #if THREAD_TYPE == TT_POSIX
297 /*
298 -------------
299 POSIX threads
300 -------------
301 */
302 {
303 pthread_t *tids ;
304 pthread_attr_t attr ;
305 void *status ;
306 /*
307 ##### NOTE: for SGI machines, this command must be present
308 ##### for the thread scheduling to be efficient.
309 ##### this is NOT a POSIX call, but SGI needs it anyway
310 pthread_setconcurrency(nthread) ;
311 */
312 pthread_attr_init(&attr) ;
313 /*
314 pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ;
315 */
316 pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS) ;
317 ALLOCATE(tids, pthread_t, nthread) ;
318 MARKTIME(t1) ;
319 for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
320 /*
321 rc = pthread_create(&tids[myid], &attr,
322 FrontMtx_workerFactor, data) ;
323 */
324 rc = pthread_create(&tids[myid], NULL, FrontMtx_workerFactor, data) ;
325 if ( rc != 0 ) {
326 fprintf(stderr,
327 "\n fatal error, myid = %d, rc = %d from pthread_create",
328 myid, rc) ;
329 exit(-1) ;
330 } else if ( msglvl > 1 ) {
331 fprintf(stderr, "\n thread %d created", tids[myid]) ;
332 }
333 }
334 MARKTIME(t2) ;
335 if ( msglvl > 1 ) {
336 fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
337 }
338 MARKTIME(t1) ;
339 for ( myid = 0 ; myid < nthread ; myid++ ) {
340 pthread_join(tids[myid], &status) ;
341 }
342 MARKTIME(t2) ;
343 if ( msglvl > 1 ) {
344 fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
345 }
346 FREE(tids) ;
347 pthread_attr_destroy(&attr) ;
348 }
349 #endif
350 /*
351 --------------------------------------------
352 get a pointer to any postponed root chevrons
353 --------------------------------------------
354 */
355 if ( postList != NULL ) {
356 rootchv = ChvList_getList(postList, nfront) ;
357 } else {
358 rootchv = NULL ;
359 }
360 /*
361 -------------------
362 fill the statistics
363 -------------------
364 */
365 for ( myid = 0, data = dataObjects ;
366 myid < nthread ;
367 myid++, data++ ) {
368 if ( msglvl > 3 ) {
369 fprintf(msgFile, "\n thread %d stats", myid) ;
370 IVfp80(msgFile, 10, data->stats, 20, &ierr) ;
371 fprintf(msgFile, "\n thread %d cpus", myid) ;
372 DVfprintf(msgFile, 10, data->cpus) ;
373 }
374 for ( ii = 0 ; ii < 10 ; ii++ ) {
375 stats[ii] += data->stats[ii] ;
376 }
377 for ( ii = 0 ; ii <= 10 ; ii++ ) {
378 cpus[ii] += data->cpus[ii] ;
379 }
380 }
381 stats[3] = frontmtx->nentD ;
382 stats[4] = frontmtx->nentL ;
383 stats[5] = frontmtx->nentU ;
384 stats[6] = frontmtx->nlocks ;
385 stats[7] = aggList->nlocks ;
386 if ( postList != NULL ) {
387 stats[8] = postList->nlocks ;
388 }
389 if ( msglvl > 0 ) {
390 fprintf(msgFile,
391 "\n\n factorization has finished"
392 "\n %d locks of the front matrix"
393 "\n %d locks of the aggregate list",
394 frontmtx->nlocks, aggList->nlocks) ;
395 if ( postList != NULL ) {
396 fprintf(msgFile, "\n %d locks of the aggregate list",
397 aggList->nlocks) ;
398 }
399 }
400 /*
401 -------------
402 free the data
403 -------------
404 */
405 MARKTIME(t1) ;
406 ChvList_free(aggList) ;
407 if ( postList != NULL ) {
408 ChvList_free(postList) ;
409 }
410 FREE(dataObjects) ;
411 MARKTIME(t2) ;
412 if ( msglvl > 1 ) {
413 fprintf(msgFile, "\n CPU %8.3f : total time", t2 - t1) ;
414 }
415
416 return(rootchv) ; }
417
418 /*--------------------------------------------------------------------*/
419 /*
420 ---------------------------------------------
421 purpose -- worker method to factor the matrix
422
423
424 created -- 97may30, cca
425 ---------------------------------------------
426 */
427 static void *
FrontMtx_workerFactor(void * arg)428 FrontMtx_workerFactor (
429 void *arg
430 ) {
431 char *status ;
432 Chv **fronts ;
433 ChvList *aggList, *postList ;
434 ChvManager *chvmanager ;
435 FactorData *data ;
436 FrontMtx *frontmtx ;
437 Pencil *pencil ;
438 double droptol, tau, t1, t2 ;
439 double *cpus ;
440 DV *workDV ;
441 ETree *frontETree ;
442 FILE *msgFile ;
443 Ideq *deq ;
444 int J, K, lookahead, msglvl, myid, nfront ;
445 int *nactiveChild, *owners, *par, *stats ;
446 IP **heads ;
447 IV *ownersIV, pivotsizesIV ;
448 Tree *tree ;
449 #if THREAD_TYPE == TT_POSIX
450 /*
451 fprintf(stdout, "\n ### inside workerFactor, pthread_self() = %d",
452 pthread_self()) ;
453 fflush(stdout) ;
454 */
455 #endif
456 /*
457 -------------------------------
458 extract pointers and dimensions
459 -------------------------------
460 */
461 MARKTIME(t1) ;
462 data = (FactorData *) arg ;
463 pencil = data->pencil ;
464 tau = data->tau ;
465 droptol = data->droptol ;
466 msglvl = data->msglvl ;
467 msgFile = data->msgFile ;
468 myid = data->myid ;
469 frontmtx = data->frontmtx ;
470 chvmanager = data->chvmanager ;
471 aggList = data->aggList ;
472 postList = data->postList ;
473 frontETree = frontmtx->frontETree ;
474 tree = ETree_tree(frontETree) ;
475 nfront = ETree_nfront(frontETree) ;
476 par = ETree_par(frontETree) ;
477 lookahead = data->lookahead ;
478 ownersIV = data->ownersIV ;
479 owners = IV_entries(ownersIV) ;
480 stats = data->stats;
481 cpus = data->cpus ;
482 #if THREAD_TYPE == TT_SOLARIS
483 if ( msglvl > 2 ) {
484 fprintf(stdout,
485 "\n ### inside workerFactor, myid = %d, thr_self() = %d",
486 myid, thr_self()) ;
487 fflush(stdout) ;
488 }
489 #endif
490 #if THREAD_TYPE == TT_POSIX
491 if ( msglvl > 2 ) {
492 fprintf(stdout, "\n ### inside workerFactor, myid = %d"
493 ", pthread_self() = %d", myid, pthread_self()) ;
494 fflush(stdout) ;
495 }
496 #endif
497 /*
498 ---------------------------------------------------
499 initialize the heads[] vector for the owned updates
500 ---------------------------------------------------
501 */
502 heads = FrontMtx_factorSetup(frontmtx, ownersIV, myid,
503 msglvl, msgFile) ;
504 /*
505 ----------------------------------------------------------------
506 initialize the Ideq object that holds the initial fronts
507 of the active paths, owned fronts with no children that
508 are owned or updates by this thread.
509 status[J] == 'W' --> J belongs to an active path for this thread
510 ----------------------------------------------------------------
511 */
512 status = CVinit(nfront, 'F') ;
513 deq = FrontMtx_setUpDequeue(frontmtx, IV_entries(ownersIV), myid,
514 status, heads, 'W', 'F', msglvl, msgFile) ;
515 FrontMtx_loadActiveLeaves(frontmtx, status, 'W', deq) ;
516 if ( msglvl > 2 ) {
517 fprintf(msgFile, "\n\n status") ;
518 CVfprintf(msgFile, nfront, status) ;
519 fflush(msgFile) ;
520 }
521 /*
522 -----------------------------------------------
523 initialize the nactiveChild[] vector,
524 nactiveChild[J] measures the number of children
525 that belong to active paths of this thread
526 -----------------------------------------------
527 */
528 nactiveChild = FrontMtx_nactiveChild(frontmtx, status, myid) ;
529 if ( msglvl > 2 ) {
530 fprintf(msgFile, "\n\n nactiveChild") ;
531 IVfprintf(msgFile, nfront, nactiveChild) ;
532 fflush(msgFile) ;
533 }
534 /*
535 ------------------------------
536 initialize the working storage
537 ------------------------------
538 */
539 ALLOCATE(fronts, Chv *, nfront) ;
540 for ( J = 0 ; J < nfront ; J++ ) {
541 fronts[J] = NULL ;
542 }
543 IV_setDefaultFields(&pivotsizesIV) ;
544 /*
545 if ( FRONTMTX_IS_PIVOTING(frontmtx)
546 && ( FRONTMTX_IS_SYMMETRIC(frontmtx)
547 || FRONTMTX_IS_HERMITIAN(frontmtx) ) ) {
548 pivotsizesIV = IV_new() ;
549 } else {
550 fprintf(msgFile, "\n no pivotsizesIV needed") ;
551 fflush(msgFile) ;
552 pivotsizesIV = NULL ;
553 }
554 */
555 workDV = DV_new() ;
556 /*
557 ---------------------------
558 loop while a path is active
559 ---------------------------
560 */
561 IVzero(10, stats) ;
562 while ( (J = Ideq_removeFromHead(deq)) != -1 ) {
563 if ( msglvl > 1 ) {
564 fprintf(msgFile, "\n\n ### checking out front %d", J) ;
565 fflush(msgFile) ;
566 }
567 FrontMtx_factorVisit(frontmtx, pencil, J, myid, owners, fronts,
568 lookahead, data->tau, data->droptol, status, heads, &pivotsizesIV,
569 workDV, par, data->aggList, data->postList,
570 data->chvmanager, stats, cpus, msglvl, msgFile) ;
571 if ( status[J] == 'E' ) {
572 /*
573 ----------------------------
574 error encountered at front J
575 ----------------------------
576 */
577 *(data->perror) = J ;
578 break ;
579 } else if ( status[J] == 'F' ) {
580 if ( msglvl > 1 ) {
581 fprintf(msgFile, "\n\n front %d is finished", J) ;
582 fflush(msgFile) ;
583 }
584 if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) {
585 if ( msglvl > 1 ) {
586 fprintf(msgFile, "\n\n adding front %d to dequeue", K) ;
587 fflush(msgFile) ;
588 }
589 Ideq_insertAtHead(deq, K) ;
590 }
591 } else {
592 if ( msglvl > 1 ) {
593 fprintf(msgFile, "\n\n front %d not yet done", J) ;
594 fflush(msgFile) ;
595 }
596 Ideq_insertAtTail(deq, J) ;
597 }
598 if ( *(data->perror) >= 0 ) {
599 /*
600 ------------------------------------
601 error encountered, break out of loop
602 ------------------------------------
603 */
604 break ;
605 }
606 }
607 /*
608 ------------------------
609 free the working storage
610 ------------------------
611 */
612 IV_clearData(&pivotsizesIV) ;
613 if ( workDV != NULL ) {
614 DV_free(workDV) ;
615 }
616 CVfree(status) ;
617 IVfree(nactiveChild) ;
618 IP_free(heads[nfront+1]) ;
619 FREE(heads) ;
620 FREE(fronts) ;
621 Ideq_free(deq) ;
622 MARKTIME(t2) ;
623 cpus[10] = t2 - t1 ;
624 cpus[9] = t2 - t1 - cpus[0] - cpus[1] - cpus[2] - cpus[3]
625 - cpus[4] - cpus[5] - cpus[6] - cpus[7] - cpus[8] ;
626
627 return(NULL) ; }
628
629 /*--------------------------------------------------------------------*/
630