1 /* maps.c */
2
3 #include "../ETree.h"
4
5 #define MYDEBUG 0
6
7 #define SUBTREE_SIZE 1
8 #define SUBTREE_OPS 2
9 #define SUBTREE_DEFINITION SUBTREE_OPS
10
11 /*--------------------------------------------------------------------*/
12 /*
13 --------------------------------------------------------------
14 this method constructs and returns an IV object that holds the
15 map from fronts to threads for a wrap map of the front tree.
16
17 created -- 96dec12, cca
18 --------------------------------------------------------------
19 */
20 IV *
ETree_wrapMap(ETree * frontETree,int type,int symflag,DV * cumopsDV)21 ETree_wrapMap (
22 ETree *frontETree,
23 int type,
24 int symflag,
25 DV *cumopsDV
26 ) {
27 double *cumops, *forwardOps ;
28 DV *forwardOpsDV ;
29 int jthread, J, nfront, nthread ;
30 int *owners ;
31 IV *ownersIV ;
32 Tree *tree ;
33 /*
34 ---------------
35 check the input
36 ---------------
37 */
38 if ( frontETree == NULL || cumopsDV == NULL ) {
39 fprintf(stderr, "\n fatal error in ETree_wrapMap(%p,%p)"
40 "\n bad input\n", frontETree, cumopsDV) ;
41 exit(-1) ;
42 }
43 tree = frontETree->tree ;
44 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
45 DV_zero(cumopsDV) ;
46 /*
47 ---------------------------------
48 get a vector of forward op counts
49 ---------------------------------
50 */
51 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
52 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
53 /*
54 -------------------
55 load the map vector
56 -------------------
57 */
58 ownersIV = IV_new() ;
59 IV_init(ownersIV, nfront, NULL) ;
60 owners = IV_entries(ownersIV) ;
61 for ( J = Tree_postOTfirst(tree) ;
62 J != -1 ;
63 J = Tree_postOTnext(tree, J) ) {
64 jthread = J % nthread ;
65 owners[J] = jthread ;
66 cumops[jthread] += forwardOps[J] ;
67 }
68 /*
69 ------------------------
70 free the working storage
71 ------------------------
72 */
73 DV_free(forwardOpsDV) ;
74
75 return(ownersIV) ; }
76
77 /*--------------------------------------------------------------------*/
78 /*
79 ----------------------------------------------------------------
80 this method constructs and returns an IV object that holds the
81 map from fronts to threads for a balanced map of the front tree.
82 the fronts are visited in the post-order traversal.
83
84 created -- 96dec12, cca
85 ----------------------------------------------------------------
86 */
87 IV *
ETree_balancedMap(ETree * frontETree,int type,int symflag,DV * cumopsDV)88 ETree_balancedMap (
89 ETree *frontETree,
90 int type,
91 int symflag,
92 DV *cumopsDV
93 ) {
94 double minops ;
95 double *cumops, *forwardOps ;
96 DV *forwardOpsDV ;
97 int ithread, jthread, J, nfront, nthread ;
98 int *owners ;
99 IV *ownersIV ;
100 Tree *tree ;
101 /*
102 ---------------
103 check the input
104 ---------------
105 */
106 if ( frontETree == NULL || cumopsDV == NULL ) {
107 fprintf(stderr, "\n fatal error in ETree_balancedMap(%p,%p)"
108 "\n bad input\n", frontETree, cumopsDV) ;
109 exit(-1) ;
110 }
111 tree = frontETree->tree ;
112 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
113 DV_zero(cumopsDV) ;
114 /*
115 ---------------------------------
116 get a vector of forward op counts
117 ---------------------------------
118 */
119 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
120 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
121 /*
122 -------------------
123 load the map vector
124 -------------------
125 */
126 ownersIV = IV_new() ;
127 IV_init(ownersIV, nfront, NULL) ;
128 owners = IV_entries(ownersIV) ;
129 for ( J = Tree_postOTfirst(tree) ;
130 J != -1 ;
131 J = Tree_postOTnext(tree, J) ) {
132 jthread = 0 ;
133 minops = cumops[0] ;
134 for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
135 if ( minops > cumops[ithread] ) {
136 jthread = ithread ;
137 minops = cumops[ithread] ;
138 }
139 }
140 owners[J] = jthread ;
141 cumops[jthread] += forwardOps[J] ;
142 }
143 /*
144 ------------------------
145 free the working storage
146 ------------------------
147 */
148 DV_free(forwardOpsDV) ;
149
150 return(ownersIV) ; }
151
152 /*--------------------------------------------------------------------*/
153 /*
154 -----------------------------------------------
155 this method constructs and returns an IV object
156 that holds the map from fronts to threads for a
157 "subtree-subset" map of the front tree.
158
159 created -- 97jan15, cca
160 -----------------------------------------------
161 */
162 IV *
ETree_subtreeSubsetMap(ETree * frontETree,int type,int symflag,DV * cumopsDV)163 ETree_subtreeSubsetMap (
164 ETree *frontETree,
165 int type,
166 int symflag,
167 DV *cumopsDV
168 ) {
169 double offset, total ;
170 double *cumops, *forwardOps, *tmetric ;
171 DV *forwardOpsDV, *tmetricDV ;
172 int I, J, mthread, nfront, nthread, q, qmin ;
173 int *fch, *firsts, *lasts, *owners, *par, *sib ;
174 IV *ownersIV ;
175 Tree *tree ;
176 /*
177 ---------------
178 check the input
179 ---------------
180 */
181 if ( frontETree == NULL || cumopsDV == NULL ) {
182 fprintf(stderr, "\n fatal error in ETree_subtreeSubsetMap(%p,%p)"
183 "\n bad input\n", frontETree, cumopsDV) ;
184 exit(-1) ;
185 }
186 tree = frontETree->tree ;
187 fch = tree->fch ;
188 par = tree->par ;
189 sib = tree->sib ;
190 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
191 DV_zero(cumopsDV) ;
192 /*
193 ---------------------------------
194 get a vector of forward op counts
195 ---------------------------------
196 */
197 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
198 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
199 #if MYDEBUG > 0
200 fprintf(stdout, "\n\n forwardOpsDV") ;
201 DV_writeForHumanEye(forwardOpsDV, stdout) ;
202 fflush(stdout) ;
203 #endif
204 /*
205 --------------------------------
206 get the subtree metric DV object
207 --------------------------------
208 */
209 tmetricDV = Tree_setSubtreeDmetric(tree, forwardOpsDV) ;
210 tmetric = DV_entries(tmetricDV) ;
211 #if MYDEBUG > 0
212 fprintf(stdout, "\n\n tmetricDV") ;
213 DV_writeForHumanEye(tmetricDV, stdout) ;
214 fflush(stdout) ;
215 #endif
216 /*
217 -----------------------------------
218 left justify the tree to make the
219 assignment algorithm work correctly
220 -----------------------------------
221 */
222 ETree_leftJustifyD(frontETree, tmetricDV) ;
223 /*
224 -----------------------------------------------------
225 fill two vectors that hold the first and last threads
226 that are eligible to own a front
227 -----------------------------------------------------
228 */
229 #if MYDEBUG > 0
230 fprintf(stdout, "\n\n pre-order traversal to determine eligible sets") ;
231 fflush(stdout) ;
232 #endif
233 firsts = IVinit(nfront, -1) ;
234 lasts = IVinit(nfront, -1) ;
235 for ( J = Tree_preOTfirst(tree) ;
236 J != -1 ;
237 J = Tree_preOTnext(tree, J) ) {
238 #if MYDEBUG > 0
239 fprintf(stdout, "\n\n visiting front %d", J) ;
240 fflush(stdout) ;
241 #endif
242 if ( par[J] == -1 ) {
243 firsts[J] = 0 ;
244 lasts[J] = nthread - 1 ;
245 #if MYDEBUG > 0
246 fprintf(stdout, ", root front") ;
247 fflush(stdout) ;
248 #endif
249 }
250 #if MYDEBUG > 0
251 fprintf(stdout, "\n first = %d, last = %d", firsts[J], lasts[J]) ;
252 fflush(stdout) ;
253 #endif
254 if ( fch[J] != -1 ) {
255 mthread = lasts[J] - firsts[J] + 1 ;
256 total = tmetric[J] - forwardOps[J] ;
257 #if MYDEBUG > 0
258 fprintf(stdout, "\n mthread = %d, total = %.0f", mthread, total) ;
259 fflush(stdout) ;
260 #endif
261 for ( I = fch[J], offset = 0.0 ; I != -1 ; I = sib[I] ) {
262 firsts[I] = firsts[J] + (int) (mthread*offset/total) ;
263 #if MYDEBUG > 0
264 fprintf(stdout, "\n child %d, offset = %.0f, firsts[%d] = %d",
265 I, offset, I, firsts[I]) ;
266 fflush(stdout) ;
267 #endif
268 offset += tmetric[I] ;
269 lasts[I] = firsts[J] + (int) (mthread*offset/total) - 1 ;
270 if ( lasts[I] < firsts[I] ) {
271 lasts[I] = firsts[I] ;
272 }
273 #if MYDEBUG > 0
274 fprintf(stdout, "\n child %d, offset = %.0f, lasts[%d] = %d",
275 I, offset, I, lasts[I]) ;
276 fflush(stdout) ;
277 #endif
278 }
279 }
280 }
281 /*
282 ---------------------------------------------------------------
283 now fill the map IV object and cumops[*] vector with a
284 balanced map using the candidate sets via a postorder traversal
285 ---------------------------------------------------------------
286 */
287 ownersIV = IV_new() ;
288 IV_init(ownersIV, nfront, NULL) ;
289 owners = IV_entries(ownersIV) ;
290 for ( J = Tree_postOTfirst(tree) ;
291 J != -1 ;
292 J = Tree_postOTnext(tree, J) ) {
293 #if MYDEBUG > 0
294 fprintf(stdout, "\n front %d, firsts[%d] = %d, lasts[%d] = %d",
295 J, J, firsts[J], J, lasts[J]) ;
296 fflush(stdout) ;
297 #endif
298 qmin = firsts[J] ;
299 for ( q = firsts[J] + 1 ; q <= lasts[J] ; q++ ) {
300 if ( cumops[qmin] > cumops[q] ) {
301 qmin = q ;
302 }
303 }
304 owners[J] = qmin ;
305 cumops[qmin] += forwardOps[J] ;
306 #if MYDEBUG > 0
307 fprintf(stdout, ", owners[%d] = %d, cumops[%d] = %.0f",
308 J, owners[J], qmin, cumops[qmin]) ;
309 fflush(stdout) ;
310 #endif
311 }
312 /*
313 ------------------------
314 free the working storage
315 ------------------------
316 */
317 DV_free(forwardOpsDV) ;
318 DV_free(tmetricDV) ;
319 IVfree(firsts) ;
320 IVfree(lasts) ;
321
322 return(ownersIV) ; }
323
324 /*--------------------------------------------------------------------*/
325 /*
326 ----------------------------------------------------------------
327 this method constructs and returns an IV object that holds the
328 map from fronts to threads for a domain decomposition
329 balanced map of the front tree.
330 the domains are mapped to threads using a balanced map,
331 and the schur complement fronts are mapped to threads
332 using a balanced map, but the two balanced maps are independent.
333
334 created -- 97jan17, cca
335 ----------------------------------------------------------------
336 */
337 IV *
ETree_ddMap(ETree * frontETree,int type,int symflag,DV * cumopsDV,double cutoff)338 ETree_ddMap (
339 ETree *frontETree,
340 int type,
341 int symflag,
342 DV *cumopsDV,
343 double cutoff
344 ) {
345 double minops ;
346 double *cumops, *domainops, *forwardOps, *schurops, *tmetric ;
347 DV *forwardOpsDV, *tmetricDV ;
348 int ithread, jthread, J, K, ndom, nfront, nthread, root ;
349 int *ms, *owners, *par, *rootAnc ;
350 IV *msIV, *ownersIV, *rootAncIV ;
351 Tree *tree ;
352 /*
353 ---------------
354 check the input
355 ---------------
356 */
357 if ( frontETree == NULL || cumopsDV == NULL ) {
358 fprintf(stderr, "\n fatal error in ETree_ddMap(%p,%p,%f)"
359 "\n bad input\n", frontETree, cumopsDV, cutoff) ;
360 exit(-1) ;
361 }
362 nfront = frontETree->nfront ;
363 tree = frontETree->tree ;
364 par = tree->par ;
365 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
366 DV_zero(cumopsDV) ;
367 /*
368 ---------------------------------
369 get a vector of forward op counts
370 ---------------------------------
371 */
372 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
373 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
374 #if MYDEBUG > 0
375 fprintf(stdout, "\n forwardOpsDV") ;
376 DV_writeForHumanEye(forwardOpsDV, stdout) ;
377 fflush(stdout) ;
378 #endif
379 #if SUBTREE_DEFINITION == SUBTREE_SIZE
380 /*
381 -----------------------------
382 get a vector of subtree sizes
383 -----------------------------
384 */
385 {
386 DV *tempDV ;
387 IV *tempIV ;
388 tempIV = ETree_nvtxMetric(frontETree) ;
389 fprintf(stdout, "\n\n nvtx metric") ;
390 IV_writeForHumanEye(tempIV, stdout) ;
391 tempDV = DV_new() ;
392 for ( J = 0 ; J < nfront ; J++ ) {
393 DV_setEntry(tempDV, J, (double) IV_entry(tempIV, J)) ;
394 }
395 #if MYDEBUG > 0
396 fprintf(stdout, "\n\n double nvtx metric") ;
397 DV_writeForHumanEye(tempDV, stdout) ;
398 #endif
399 tmetricDV = Tree_setSubtreeDmetric(tree, tempDV) ;
400 IV_free(tempIV) ;
401 DV_free(tempDV) ;
402 }
403 #endif
404 #if SUBTREE_DEFINITION == SUBTREE_OPS
405 tmetricDV = Tree_setSubtreeDmetric(tree, forwardOpsDV) ;
406 #endif
407 /*
408 ------------------------
409 get a multisector vector
410 ------------------------
411 */
412 msIV = IV_new() ;
413 IV_init(msIV, nfront, NULL) ;
414 IV_fill(msIV, 0) ;
415 ms = IV_entries(msIV) ;
416 #if MYDEBUG > 0
417 fprintf(stdout, "\n\n double nvtx subtree metric") ;
418 DV_writeForHumanEye(tmetricDV, stdout) ;
419 #endif
420 tmetric = DV_entries(tmetricDV) ;
421 cutoff = cutoff * DV_max(tmetricDV) ;
422 for ( J = 0 ; J < nfront ; J++ ) {
423 if ( tmetric[J] < cutoff ) {
424 ms[J] = 1 ;
425 }
426 }
427 #if MYDEBUG > 0
428 fprintf(stdout, "\n msIV") ;
429 IV_writeForHumanEye(msIV, stdout) ;
430 fflush(stdout) ;
431 #endif
432 /*
433 --------------------------------------------
434 create a rootAnc vector,
435 if J is in a domain then
436 rootAnc[J] is the root node of the domain
437 else
438 rootAnc[J] is the root node of the tree
439 endif
440 --------------------------------------------
441 */
442 rootAncIV = IV_new() ;
443 IV_init(rootAncIV, nfront, NULL) ;
444 rootAnc = IV_entries(rootAncIV) ;
445 for ( J = nfront - 1 ; J >= 0 ; J-- ) {
446 if ( (K = par[J]) == -1 || ms[J] != ms[K] ) {
447 rootAnc[J] = J ;
448 } else {
449 rootAnc[J] = rootAnc[K] ;
450 }
451 }
452 #if MYDEBUG > 0
453 fprintf(stdout, "\n rootAncIV") ;
454 IV_writeForHumanEye(rootAncIV, stdout) ;
455 fflush(stdout) ;
456 #endif
457 /*
458 ------------------------------
459 initialize the ownersIV object
460 ------------------------------
461 */
462 ownersIV = IV_new() ;
463 IV_init(ownersIV, nfront, NULL) ;
464 owners = IV_entries(ownersIV) ;
465 /*
466 --------------------------------------------------
467 assign the domains to threads using a balanced map
468 --------------------------------------------------
469 */
470 domainops = DVinit(nthread, 0.0) ;
471 root = -1 ;
472 ndom = 0 ;
473 for ( J = Tree_postOTfirst(tree) ;
474 J != -1 ;
475 J = Tree_postOTnext(tree, J) ) {
476 if ( ms[J] == 1 ) {
477 if ( root != rootAnc[J] ) {
478 ndom++ ;
479 root = rootAnc[J] ;
480 jthread = 0 ;
481 minops = domainops[0] ;
482 for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
483 if ( minops > domainops[ithread] ) {
484 jthread = ithread ;
485 minops = domainops[ithread] ;
486 }
487 }
488 }
489 owners[J] = jthread ;
490 domainops[jthread] += forwardOps[J] ;
491 }
492 }
493 #if MYDEBUG > 0
494 fprintf(stdout, "\n %d domains", ndom) ;
495 fprintf(stdout, "\n domainops") ;
496 DVfprintf(stdout, nthread, domainops) ;
497 fflush(stdout) ;
498 #endif
499 /*
500 ------------------------------------------------------------------
501 assign the schur complement fronts to threads using a balanced map
502 ------------------------------------------------------------------
503 */
504 schurops = DVinit(nthread, 0.0) ;
505 for ( J = Tree_postOTfirst(tree) ;
506 J != -1 ;
507 J = Tree_postOTnext(tree, J) ) {
508 if ( ms[J] == 0 ) {
509 jthread = 0 ;
510 minops = schurops[0] ;
511 for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
512 if ( minops > schurops[ithread] ) {
513 jthread = ithread ;
514 minops = schurops[ithread] ;
515 }
516 }
517 owners[J] = jthread ;
518 schurops[jthread] += forwardOps[J] ;
519 }
520 }
521 #if MYDEBUG > 0
522 fprintf(stdout, "\n schurops") ;
523 DVfprintf(stdout, nthread, schurops) ;
524 fflush(stdout) ;
525 #endif
526 /*
527 -------------------------------------
528 fill the cumulative operations vector
529 -------------------------------------
530 */
531 for ( jthread = 0 ; jthread < nthread ; jthread++ ) {
532 cumops[jthread] = domainops[jthread] + schurops[jthread] ;
533 }
534 #if MYDEBUG > 0
535 fprintf(stdout, "\n cumops") ;
536 DVfprintf(stdout, nthread, cumops) ;
537 fflush(stdout) ;
538 #endif
539 /*
540 ------------------------
541 free the working storage
542 ------------------------
543 */
544 DV_free(forwardOpsDV) ;
545 DV_free(tmetricDV) ;
546 IV_free(msIV) ;
547 IV_free(rootAncIV) ;
548 DVfree(domainops) ;
549 DVfree(schurops) ;
550
551 return(ownersIV) ; }
552
553 /*--------------------------------------------------------------------*/
554 /*
555 ----------------------------------------------------------------
556 this method constructs and returns an IV object that holds the
557 map from fronts to threads for a domain decomposition
558 balanced map of the front tree.
559 the domains are mapped to threads using a balanced map,
560 and the schur complement fronts are mapped to threads
561 using a balanced map, but the two balanced maps are independent.
562
563 created -- 97jan17, cca
564 ----------------------------------------------------------------
565 */
566 IV *
ETree_ddMapNew(ETree * frontETree,int type,int symflag,IV * msIV,DV * cumopsDV)567 ETree_ddMapNew (
568 ETree *frontETree,
569 int type,
570 int symflag,
571 IV *msIV,
572 DV *cumopsDV
573 ) {
574 double minops ;
575 double *cumops, *domprios, *domops, *forwardOps,
576 *schurprios, *schurops ;
577 DV *forwardOpsDV ;
578 int domid, idom, ireg, ithread, jthread, J, K, nbndJ,
579 ndom, nfront, nJ, nschur, nthread,
580 nvtx, v ;
581 int *bndwghts, *domids, *dommap, *frontToRegion, *ms,
582 *nodwghts, *owners, *par, *schurids, *vtxToFront ;
583 IV *ownersIV ;
584 Tree *tree ;
585 /*
586 ---------------
587 check the input
588 ---------------
589 */
590 if ( frontETree == NULL || cumopsDV == NULL ) {
591 fprintf(stderr, "\n fatal error in ETree_ddMapNew(%p,%p,%p)"
592 "\n bad input\n", frontETree, msIV, cumopsDV) ;
593 exit(-1) ;
594 }
595 nfront = ETree_nfront(frontETree) ;
596 nvtx = ETree_nvtx(frontETree) ;
597 tree = ETree_tree(frontETree) ;
598 vtxToFront = ETree_vtxToFront(frontETree) ;
599 nodwghts = ETree_nodwghts(frontETree) ;
600 bndwghts = ETree_bndwghts(frontETree) ;
601 par = ETree_par(frontETree) ;
602 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
603 DV_zero(cumopsDV) ;
604 ms = IV_entries(msIV) ;
605 ownersIV = IV_new() ;
606 IV_init(ownersIV, nfront, NULL) ;
607 owners = IV_entries(ownersIV) ;
608 /*
609 --------------------------------------
610 compute the frontToRegion[] map vector
611 --------------------------------------
612 */
613 frontToRegion = IVinit(nfront, -1) ;
614 for ( v = 0 ; v < nvtx ; v++ ) {
615 J = vtxToFront[v] ;
616 frontToRegion[J] = ms[v] ;
617 }
618 #if MYDEBUG > 0
619 fprintf(stdout, "\n frontToRegion[] from msIV") ;
620 IVfprintf(stdout, nfront, frontToRegion) ;
621 #endif
622 ndom = 0 ;
623 for ( J = Tree_preOTfirst(tree) ;
624 J != -1 ;
625 J = Tree_preOTnext(tree, J) ) {
626 if ( frontToRegion[J] != 0 ) {
627 if ( (K = par[J]) != -1 ) {
628 if ( frontToRegion[K] == 0 ) {
629 frontToRegion[J] = ++ndom ;
630 } else {
631 frontToRegion[J] = frontToRegion[K] ;
632 }
633 } else {
634 frontToRegion[J] = ++ndom ;
635 }
636 }
637 }
638 #if MYDEBUG > 0
639 fprintf(stdout, "\n frontToRegion[], separate domains") ;
640 IVfprintf(stdout, nfront, frontToRegion) ;
641 #endif
642 /*
643 ---------------------------------
644 get a vector of forward op counts
645 ---------------------------------
646 */
647 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
648 forwardOps = DV_entries(forwardOpsDV) ;
649 #if MYDEBUG > 0
650 fprintf(stdout, "\n forwardOpsDV") ;
651 DV_writeForHumanEye(forwardOpsDV, stdout) ;
652 #endif
653 /*
654 ----------------------------------------
655 for each domain, compute the forward ops
656 ----------------------------------------
657 */
658 domprios = DVinit(ndom+1, 0.0) ;
659 for ( J = 0 ; J < nfront ; J++ ) {
660 if ( (ireg = frontToRegion[J]) > 0 ) {
661 domprios[ireg] += forwardOps[J] ;
662 }
663 }
664 #if MYDEBUG > 0
665 fprintf(stdout, "\n domprios[], sum = %.0f",
666 DVsum(ndom, domprios+1)) ;
667 DVfprintf(stdout, ndom+1, domprios) ;
668 #endif
669 /*
670 --------------------------------------------------------
671 sort the domains by descending order of their operations
672 --------------------------------------------------------
673 */
674 domids = IVinit(ndom, -1) ;
675 IVramp(ndom, domids, 1, 1) ;
676 #if MYDEBUG > 0
677 fprintf(stdout, "\n before sort: domids") ;
678 IVfprintf(stdout, ndom, domids) ;
679 fprintf(stdout, "\n before sort: domprios") ;
680 DVfprintf(stdout, ndom, domprios+1) ;
681 #endif
682 DVIVqsortDown(ndom, domprios + 1, domids) ;
683 #if MYDEBUG > 0
684 fprintf(stdout, "\n after sort: domids") ;
685 IVfprintf(stdout, ndom, domids) ;
686 fprintf(stdout, "\n after sort: domprios") ;
687 DVfprintf(stdout, ndom, domprios+1) ;
688 #endif
689 /*
690 ----------------------------
691 assign domains to processors
692 ----------------------------
693 */
694 dommap = IVinit(ndom+1, -1) ;
695 domops = DVinit(nthread, 0.0) ;
696 for ( idom = 0 ; idom < ndom ; idom++ ) {
697 domid = domids[idom] ;
698 jthread = 0 ;
699 minops = domops[0] ;
700 for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
701 if ( domops[ithread] < minops ) {
702 jthread = ithread ;
703 minops = domops[ithread] ;
704 }
705 }
706 dommap[domid] = jthread ;
707 domops[jthread] += domprios[idom+1] ;
708 #if MYDEBUG > 0
709 fprintf(stdout,
710 "\n assigning domain %d with ops %.0f to thread %d, new ops = %.0f",
711 domid, domprios[idom+1], jthread, domops[jthread]) ;
712 #endif
713 }
714 #if MYDEBUG > 0
715 fprintf(stdout, "\n domops[]") ;
716 DVfprintf(stdout, nthread, domops) ;
717 #endif
718 /*
719 -------------------------------------
720 assign fronts in domains to processes
721 -------------------------------------
722 */
723 for ( J = 0 ; J < nfront ; J++ ) {
724 if ( (idom = frontToRegion[J]) > 0 ) {
725 owners[J] = dommap[idom] ;
726 }
727 }
728 /*
729 -----------------------------------------------------
730 now compute priorities of the schur complement fronts
731 -----------------------------------------------------
732 */
733 schurprios = DVinit(nfront, 0.0) ;
734 for ( J = 0 ; J < nfront ; J++ ) {
735 if ( frontToRegion[J] == 0 ) {
736 nJ = nodwghts[J] ;
737 nbndJ = bndwghts[J] ;
738 schurprios[J] = nJ*nJ*(nJ + nbndJ) ;
739 }
740 }
741 #if MYDEBUG > 0
742 fprintf(stdout, "\n schur front ops") ;
743 DVfprintf(stdout, nfront, schurprios) ;
744 #endif
745 for ( J = Tree_preOTfirst(tree) ;
746 J != -1 ;
747 J = Tree_preOTnext(tree, J) ) {
748 if ( frontToRegion[J] == 0 ) {
749 if ( (K = par[J]) != -1 ) {
750 schurprios[J] += schurprios[K] ;
751 }
752 }
753 }
754 /*
755 --------------------------------------------------
756 sort the priorities of the schur complement fronts
757 in descending order of their priorities
758 --------------------------------------------------
759 */
760 schurids = IVinit(nfront, -1) ;
761 for ( J = nschur = 0 ; J < nfront ; J++ ) {
762 if ( frontToRegion[J] == 0 ) {
763 schurids[nschur] = J ;
764 schurprios[nschur] = schurprios[J] ;
765 nschur++ ;
766 }
767 }
768 #if MYDEBUG > 0
769 fprintf(stdout, "\n before sort: schurids") ;
770 IVfprintf(stdout, nschur, schurids) ;
771 fprintf(stdout, "\n before sort: schurprios") ;
772 DVfprintf(stdout, nschur, schurprios) ;
773 #endif
774 DVIVqsortDown(nschur, schurprios, schurids) ;
775 #if MYDEBUG > 0
776 fprintf(stdout, "\n after sort: schurids") ;
777 IVfprintf(stdout, nschur, schurids) ;
778 fprintf(stdout, "\n after sort: schurprios") ;
779 DVfprintf(stdout, nschur, schurprios) ;
780 #endif
781 /*
782 ---------------------------------
783 assign schur fronts to processors
784 ---------------------------------
785 */
786 schurops = DVinit(nthread, 0.0) ;
787 for ( ireg = 0 ; ireg < nschur ; ireg++ ) {
788 J = schurids[ireg] ;
789 jthread = 0 ;
790 minops = schurops[0] ;
791 for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
792 if ( schurops[ithread] < minops ) {
793 jthread = ithread ;
794 minops = schurops[ithread] ;
795 }
796 }
797 owners[J] = jthread ;
798 schurops[jthread] += forwardOps[J] ;
799 #if MYDEBUG > 0
800 fprintf(stdout,
801 "\n assigning schur front %d to thread %d, new ops = %.0f",
802 J, jthread, schurops[jthread]) ;
803 #endif
804 }
805 /*
806 ------------------------------
807 fill the cumulative ops vector
808 ------------------------------
809 */
810 for ( jthread = 0 ; jthread < nthread ; jthread++ ) {
811 cumops[jthread] = domops[jthread] + schurops[jthread] ;
812 }
813 #if MYDEBUG > 0
814 fprintf(stdout, "\n domops[]") ;
815 DVfprintf(stdout, nthread, domops) ;
816 fprintf(stdout, "\n schurops[]") ;
817 DVfprintf(stdout, nthread, schurops) ;
818 fprintf(stdout, "\n sum(domops) = %.0f", DVsum(nthread, domops)) ;
819 fprintf(stdout, "\n sum(schurops) = %.0f", DVsum(nthread, schurops)) ;
820 fprintf(stdout, "\n sum(cumops) = %.0f", DV_sum(cumopsDV)) ;
821 #endif
822 /*
823 ------------------------
824 free the working storage
825 ------------------------
826 */
827 IVfree(frontToRegion) ;
828 IVfree(domids) ;
829 IVfree(dommap) ;
830 IVfree(schurids) ;
831 DV_free(forwardOpsDV) ;
832 DVfree(domprios) ;
833 DVfree(domops) ;
834 DVfree(schurprios) ;
835 DVfree(schurops) ;
836
837 return(ownersIV) ; }
838
839 /*--------------------------------------------------------------------*/
840