1 /*  maps.c  */
2 
3 #include "../ETree.h"
4 
5 #define MYDEBUG  0
6 
7 #define SUBTREE_SIZE   1
8 #define SUBTREE_OPS    2
9 #define SUBTREE_DEFINITION SUBTREE_OPS
10 
11 /*--------------------------------------------------------------------*/
12 /*
13    --------------------------------------------------------------
14    this method constructs and returns an IV object that holds the
15    map from fronts to threads for a wrap map of the front tree.
16 
17    created -- 96dec12, cca
18    --------------------------------------------------------------
19 */
20 IV *
ETree_wrapMap(ETree * frontETree,int type,int symflag,DV * cumopsDV)21 ETree_wrapMap (
22    ETree   *frontETree,
23    int     type,
24    int     symflag,
25    DV      *cumopsDV
26 ) {
27 double   *cumops, *forwardOps ;
28 DV       *forwardOpsDV ;
29 int      jthread, J, nfront, nthread ;
30 int      *owners ;
31 IV       *ownersIV ;
32 Tree     *tree ;
33 /*
34    ---------------
35    check the input
36    ---------------
37 */
38 if ( frontETree == NULL || cumopsDV == NULL ) {
39    fprintf(stderr, "\n fatal error in ETree_wrapMap(%p,%p)"
40            "\n bad input\n", frontETree, cumopsDV) ;
41    exit(-1) ;
42 }
43 tree = frontETree->tree ;
44 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
45 DV_zero(cumopsDV) ;
46 /*
47    ---------------------------------
48    get a vector of forward op counts
49    ---------------------------------
50 */
51 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
52 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
53 /*
54    -------------------
55    load the map vector
56    -------------------
57 */
58 ownersIV = IV_new() ;
59 IV_init(ownersIV, nfront, NULL) ;
60 owners = IV_entries(ownersIV) ;
61 for ( J = Tree_postOTfirst(tree) ;
62       J != -1 ;
63       J = Tree_postOTnext(tree, J) ) {
64    jthread = J % nthread ;
65    owners[J] = jthread ;
66    cumops[jthread] += forwardOps[J] ;
67 }
68 /*
69    ------------------------
70    free the working storage
71    ------------------------
72 */
73 DV_free(forwardOpsDV) ;
74 
75 return(ownersIV) ; }
76 
77 /*--------------------------------------------------------------------*/
78 /*
79    ----------------------------------------------------------------
80    this method constructs and returns an IV object that holds the
81    map from fronts to threads for a balanced map of the front tree.
82    the fronts are visited in the post-order traversal.
83 
84    created -- 96dec12, cca
85    ----------------------------------------------------------------
86 */
87 IV *
ETree_balancedMap(ETree * frontETree,int type,int symflag,DV * cumopsDV)88 ETree_balancedMap (
89    ETree   *frontETree,
90    int     type,
91    int     symflag,
92    DV      *cumopsDV
93 ) {
94 double   minops ;
95 double   *cumops, *forwardOps ;
96 DV       *forwardOpsDV ;
97 int      ithread, jthread, J, nfront, nthread ;
98 int      *owners ;
99 IV       *ownersIV ;
100 Tree     *tree ;
101 /*
102    ---------------
103    check the input
104    ---------------
105 */
106 if ( frontETree == NULL || cumopsDV == NULL ) {
107    fprintf(stderr, "\n fatal error in ETree_balancedMap(%p,%p)"
108            "\n bad input\n", frontETree, cumopsDV) ;
109    exit(-1) ;
110 }
111 tree = frontETree->tree ;
112 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
113 DV_zero(cumopsDV) ;
114 /*
115    ---------------------------------
116    get a vector of forward op counts
117    ---------------------------------
118 */
119 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
120 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
121 /*
122    -------------------
123    load the map vector
124    -------------------
125 */
126 ownersIV = IV_new() ;
127 IV_init(ownersIV, nfront, NULL) ;
128 owners = IV_entries(ownersIV) ;
129 for ( J = Tree_postOTfirst(tree) ;
130       J != -1 ;
131       J = Tree_postOTnext(tree, J) ) {
132    jthread = 0 ;
133    minops  = cumops[0] ;
134    for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
135       if ( minops > cumops[ithread] ) {
136          jthread = ithread ;
137          minops  = cumops[ithread] ;
138       }
139    }
140    owners[J] = jthread ;
141    cumops[jthread] += forwardOps[J] ;
142 }
143 /*
144    ------------------------
145    free the working storage
146    ------------------------
147 */
148 DV_free(forwardOpsDV) ;
149 
150 return(ownersIV) ; }
151 
152 /*--------------------------------------------------------------------*/
153 /*
154    -----------------------------------------------
155    this method constructs and returns an IV object
156    that holds the map from fronts to threads for a
157    "subtree-subset" map of the front tree.
158 
159    created -- 97jan15, cca
160    -----------------------------------------------
161 */
162 IV *
ETree_subtreeSubsetMap(ETree * frontETree,int type,int symflag,DV * cumopsDV)163 ETree_subtreeSubsetMap (
164    ETree   *frontETree,
165    int     type,
166    int     symflag,
167    DV      *cumopsDV
168 ) {
169 double   offset, total ;
170 double   *cumops, *forwardOps, *tmetric ;
171 DV       *forwardOpsDV, *tmetricDV ;
172 int      I, J, mthread, nfront, nthread, q, qmin ;
173 int      *fch, *firsts, *lasts, *owners, *par, *sib ;
174 IV       *ownersIV ;
175 Tree     *tree ;
176 /*
177    ---------------
178    check the input
179    ---------------
180 */
181 if ( frontETree == NULL || cumopsDV == NULL ) {
182    fprintf(stderr, "\n fatal error in ETree_subtreeSubsetMap(%p,%p)"
183            "\n bad input\n", frontETree, cumopsDV) ;
184    exit(-1) ;
185 }
186 tree = frontETree->tree ;
187 fch  = tree->fch ;
188 par  = tree->par ;
189 sib  = tree->sib ;
190 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
191 DV_zero(cumopsDV) ;
192 /*
193    ---------------------------------
194    get a vector of forward op counts
195    ---------------------------------
196 */
197 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
198 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
199 #if MYDEBUG > 0
200 fprintf(stdout, "\n\n forwardOpsDV") ;
201 DV_writeForHumanEye(forwardOpsDV, stdout) ;
202 fflush(stdout) ;
203 #endif
204 /*
205    --------------------------------
206    get the subtree metric DV object
207    --------------------------------
208 */
209 tmetricDV = Tree_setSubtreeDmetric(tree, forwardOpsDV) ;
210 tmetric = DV_entries(tmetricDV) ;
211 #if MYDEBUG > 0
212 fprintf(stdout, "\n\n tmetricDV") ;
213 DV_writeForHumanEye(tmetricDV, stdout) ;
214 fflush(stdout) ;
215 #endif
216 /*
217    -----------------------------------
218    left justify the tree to make the
219    assignment algorithm work correctly
220    -----------------------------------
221 */
222 ETree_leftJustifyD(frontETree, tmetricDV) ;
223 /*
224    -----------------------------------------------------
225    fill two vectors that hold the first and last threads
226    that are eligible to own a front
227    -----------------------------------------------------
228 */
229 #if MYDEBUG > 0
230 fprintf(stdout, "\n\n pre-order traversal to determine eligible sets") ;
231 fflush(stdout) ;
232 #endif
233 firsts = IVinit(nfront, -1) ;
234 lasts  = IVinit(nfront, -1) ;
235 for ( J = Tree_preOTfirst(tree) ;
236       J != -1 ;
237       J = Tree_preOTnext(tree, J) ) {
238 #if MYDEBUG > 0
239    fprintf(stdout, "\n\n visiting front %d", J) ;
240    fflush(stdout) ;
241 #endif
242    if ( par[J] == -1 ) {
243       firsts[J] = 0 ;
244       lasts[J]  = nthread - 1 ;
245 #if MYDEBUG > 0
246       fprintf(stdout, ", root front") ;
247       fflush(stdout) ;
248 #endif
249    }
250 #if MYDEBUG > 0
251    fprintf(stdout, "\n first = %d, last = %d", firsts[J], lasts[J]) ;
252    fflush(stdout) ;
253 #endif
254    if ( fch[J] != -1 ) {
255       mthread = lasts[J] - firsts[J] + 1 ;
256       total   = tmetric[J] - forwardOps[J] ;
257 #if MYDEBUG > 0
258       fprintf(stdout, "\n mthread = %d, total = %.0f", mthread, total) ;
259       fflush(stdout) ;
260 #endif
261       for ( I = fch[J], offset = 0.0 ; I != -1 ; I = sib[I] ) {
262          firsts[I] = firsts[J] + (int) (mthread*offset/total) ;
263 #if MYDEBUG > 0
264          fprintf(stdout, "\n child %d, offset = %.0f, firsts[%d] = %d",
265                  I, offset, I, firsts[I]) ;
266          fflush(stdout) ;
267 #endif
268          offset += tmetric[I] ;
269          lasts[I] = firsts[J] + (int) (mthread*offset/total) - 1 ;
270          if ( lasts[I] < firsts[I] ) {
271             lasts[I] = firsts[I] ;
272          }
273 #if MYDEBUG > 0
274          fprintf(stdout, "\n child %d, offset = %.0f, lasts[%d] = %d",
275                  I, offset, I, lasts[I]) ;
276          fflush(stdout) ;
277 #endif
278       }
279    }
280 }
281 /*
282    ---------------------------------------------------------------
283    now fill the map IV object and cumops[*] vector with a
284    balanced map using the candidate sets via a postorder traversal
285    ---------------------------------------------------------------
286 */
287 ownersIV = IV_new() ;
288 IV_init(ownersIV, nfront, NULL) ;
289 owners = IV_entries(ownersIV) ;
290 for ( J = Tree_postOTfirst(tree) ;
291       J != -1 ;
292       J = Tree_postOTnext(tree, J) ) {
293 #if MYDEBUG > 0
294    fprintf(stdout, "\n front %d, firsts[%d] = %d, lasts[%d] = %d",
295            J, J, firsts[J], J, lasts[J]) ;
296    fflush(stdout) ;
297 #endif
298    qmin = firsts[J] ;
299    for ( q = firsts[J] + 1 ; q <= lasts[J] ; q++ ) {
300       if ( cumops[qmin] > cumops[q] ) {
301          qmin = q ;
302       }
303    }
304    owners[J] = qmin ;
305    cumops[qmin] += forwardOps[J] ;
306 #if MYDEBUG > 0
307    fprintf(stdout, ", owners[%d] = %d, cumops[%d] = %.0f",
308            J, owners[J], qmin, cumops[qmin]) ;
309    fflush(stdout) ;
310 #endif
311 }
312 /*
313    ------------------------
314    free the working storage
315    ------------------------
316 */
317 DV_free(forwardOpsDV) ;
318 DV_free(tmetricDV) ;
319 IVfree(firsts) ;
320 IVfree(lasts) ;
321 
322 return(ownersIV) ; }
323 
324 /*--------------------------------------------------------------------*/
325 /*
326    ----------------------------------------------------------------
327    this method constructs and returns an IV object that holds the
328    map from fronts to threads for a domain decomposition
329    balanced map of the front tree.
330    the domains are mapped to threads using a balanced map,
331    and the schur complement fronts are mapped to threads
332    using a balanced map, but the two balanced maps are independent.
333 
334    created -- 97jan17, cca
335    ----------------------------------------------------------------
336 */
337 IV *
ETree_ddMap(ETree * frontETree,int type,int symflag,DV * cumopsDV,double cutoff)338 ETree_ddMap (
339    ETree    *frontETree,
340    int     type,
341    int     symflag,
342    DV       *cumopsDV,
343    double   cutoff
344 ) {
345 double   minops ;
346 double   *cumops, *domainops, *forwardOps, *schurops, *tmetric ;
347 DV       *forwardOpsDV, *tmetricDV ;
348 int      ithread, jthread, J, K, ndom, nfront, nthread, root ;
349 int      *ms, *owners, *par, *rootAnc ;
350 IV       *msIV, *ownersIV, *rootAncIV ;
351 Tree     *tree ;
352 /*
353    ---------------
354    check the input
355    ---------------
356 */
357 if ( frontETree == NULL || cumopsDV == NULL ) {
358    fprintf(stderr, "\n fatal error in ETree_ddMap(%p,%p,%f)"
359            "\n bad input\n", frontETree, cumopsDV, cutoff) ;
360    exit(-1) ;
361 }
362 nfront = frontETree->nfront ;
363 tree   = frontETree->tree ;
364 par    = tree->par ;
365 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
366 DV_zero(cumopsDV) ;
367 /*
368    ---------------------------------
369    get a vector of forward op counts
370    ---------------------------------
371 */
372 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
373 DV_sizeAndEntries(forwardOpsDV, &nfront, &forwardOps) ;
374 #if MYDEBUG > 0
375 fprintf(stdout, "\n forwardOpsDV") ;
376 DV_writeForHumanEye(forwardOpsDV, stdout) ;
377 fflush(stdout) ;
378 #endif
379 #if SUBTREE_DEFINITION == SUBTREE_SIZE
380 /*
381    -----------------------------
382    get a vector of subtree sizes
383    -----------------------------
384 */
385 {
386 DV   *tempDV ;
387 IV   *tempIV ;
388 tempIV = ETree_nvtxMetric(frontETree) ;
389 fprintf(stdout, "\n\n nvtx metric") ;
390 IV_writeForHumanEye(tempIV, stdout) ;
391 tempDV = DV_new() ;
392 for ( J = 0 ; J < nfront ; J++ ) {
393    DV_setEntry(tempDV, J, (double) IV_entry(tempIV, J)) ;
394 }
395 #if MYDEBUG > 0
396 fprintf(stdout, "\n\n double nvtx metric") ;
397 DV_writeForHumanEye(tempDV, stdout) ;
398 #endif
399 tmetricDV = Tree_setSubtreeDmetric(tree, tempDV) ;
400 IV_free(tempIV) ;
401 DV_free(tempDV) ;
402 }
403 #endif
404 #if SUBTREE_DEFINITION == SUBTREE_OPS
405 tmetricDV = Tree_setSubtreeDmetric(tree, forwardOpsDV) ;
406 #endif
407 /*
408    ------------------------
409    get a multisector vector
410    ------------------------
411 */
412 msIV = IV_new() ;
413 IV_init(msIV, nfront, NULL) ;
414 IV_fill(msIV, 0) ;
415 ms = IV_entries(msIV) ;
416 #if MYDEBUG > 0
417 fprintf(stdout, "\n\n double nvtx subtree metric") ;
418 DV_writeForHumanEye(tmetricDV, stdout) ;
419 #endif
420 tmetric   = DV_entries(tmetricDV) ;
421 cutoff = cutoff * DV_max(tmetricDV) ;
422 for ( J = 0 ; J < nfront ; J++ ) {
423    if ( tmetric[J] < cutoff ) {
424       ms[J] = 1 ;
425    }
426 }
427 #if MYDEBUG > 0
428 fprintf(stdout, "\n msIV") ;
429 IV_writeForHumanEye(msIV, stdout) ;
430 fflush(stdout) ;
431 #endif
432 /*
433    --------------------------------------------
434    create a rootAnc vector,
435    if J is in a domain then
436       rootAnc[J] is the root node of the domain
437    else
438       rootAnc[J] is the root node of the tree
439    endif
440    --------------------------------------------
441 */
442 rootAncIV = IV_new() ;
443 IV_init(rootAncIV, nfront, NULL) ;
444 rootAnc   = IV_entries(rootAncIV) ;
445 for ( J = nfront - 1 ; J >= 0 ; J-- ) {
446    if ( (K = par[J]) == -1 || ms[J] != ms[K] ) {
447       rootAnc[J] = J ;
448    } else {
449       rootAnc[J] = rootAnc[K] ;
450    }
451 }
452 #if MYDEBUG > 0
453 fprintf(stdout, "\n rootAncIV") ;
454 IV_writeForHumanEye(rootAncIV, stdout) ;
455 fflush(stdout) ;
456 #endif
457 /*
458    ------------------------------
459    initialize the ownersIV object
460    ------------------------------
461 */
462 ownersIV = IV_new() ;
463 IV_init(ownersIV, nfront, NULL) ;
464 owners = IV_entries(ownersIV) ;
465 /*
466    --------------------------------------------------
467    assign the domains to threads using a balanced map
468    --------------------------------------------------
469 */
470 domainops = DVinit(nthread, 0.0) ;
471 root = -1 ;
472 ndom =  0 ;
473 for ( J = Tree_postOTfirst(tree) ;
474       J != -1 ;
475       J = Tree_postOTnext(tree, J) ) {
476    if ( ms[J] == 1 ) {
477       if ( root != rootAnc[J] ) {
478          ndom++ ;
479          root    = rootAnc[J] ;
480          jthread = 0 ;
481          minops  = domainops[0] ;
482          for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
483             if ( minops > domainops[ithread] ) {
484                jthread = ithread ;
485                minops  = domainops[ithread] ;
486             }
487          }
488       }
489       owners[J] = jthread ;
490       domainops[jthread] += forwardOps[J] ;
491    }
492 }
493 #if MYDEBUG > 0
494 fprintf(stdout, "\n %d domains", ndom) ;
495 fprintf(stdout, "\n domainops") ;
496 DVfprintf(stdout, nthread, domainops) ;
497 fflush(stdout) ;
498 #endif
499 /*
500    ------------------------------------------------------------------
501    assign the schur complement fronts to threads using a balanced map
502    ------------------------------------------------------------------
503 */
504 schurops = DVinit(nthread, 0.0) ;
505 for ( J = Tree_postOTfirst(tree) ;
506       J != -1 ;
507       J = Tree_postOTnext(tree, J) ) {
508    if ( ms[J] == 0 ) {
509       jthread = 0 ;
510       minops  = schurops[0] ;
511       for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
512          if ( minops > schurops[ithread] ) {
513             jthread = ithread ;
514             minops  = schurops[ithread] ;
515          }
516       }
517       owners[J] = jthread ;
518       schurops[jthread] += forwardOps[J] ;
519    }
520 }
521 #if MYDEBUG > 0
522 fprintf(stdout, "\n schurops") ;
523 DVfprintf(stdout, nthread, schurops) ;
524 fflush(stdout) ;
525 #endif
526 /*
527    -------------------------------------
528    fill the cumulative operations vector
529    -------------------------------------
530 */
531 for ( jthread = 0 ; jthread < nthread ; jthread++ ) {
532    cumops[jthread] = domainops[jthread] + schurops[jthread] ;
533 }
534 #if MYDEBUG > 0
535 fprintf(stdout, "\n cumops") ;
536 DVfprintf(stdout, nthread, cumops) ;
537 fflush(stdout) ;
538 #endif
539 /*
540    ------------------------
541    free the working storage
542    ------------------------
543 */
544 DV_free(forwardOpsDV) ;
545 DV_free(tmetricDV) ;
546 IV_free(msIV) ;
547 IV_free(rootAncIV) ;
548 DVfree(domainops) ;
549 DVfree(schurops) ;
550 
551 return(ownersIV) ; }
552 
553 /*--------------------------------------------------------------------*/
554 /*
555    ----------------------------------------------------------------
556    this method constructs and returns an IV object that holds the
557    map from fronts to threads for a domain decomposition
558    balanced map of the front tree.
559    the domains are mapped to threads using a balanced map,
560    and the schur complement fronts are mapped to threads
561    using a balanced map, but the two balanced maps are independent.
562 
563    created -- 97jan17, cca
564    ----------------------------------------------------------------
565 */
566 IV *
ETree_ddMapNew(ETree * frontETree,int type,int symflag,IV * msIV,DV * cumopsDV)567 ETree_ddMapNew (
568    ETree   *frontETree,
569    int     type,
570    int     symflag,
571    IV      *msIV,
572    DV      *cumopsDV
573 ) {
574 double   minops ;
575 double   *cumops, *domprios, *domops, *forwardOps,
576          *schurprios, *schurops ;
577 DV       *forwardOpsDV ;
578 int      domid, idom, ireg, ithread, jthread, J, K, nbndJ,
579          ndom, nfront, nJ, nschur, nthread,
580          nvtx, v ;
581 int      *bndwghts, *domids, *dommap, *frontToRegion, *ms,
582          *nodwghts, *owners, *par, *schurids, *vtxToFront ;
583 IV       *ownersIV ;
584 Tree     *tree ;
585 /*
586    ---------------
587    check the input
588    ---------------
589 */
590 if ( frontETree == NULL || cumopsDV == NULL ) {
591    fprintf(stderr, "\n fatal error in ETree_ddMapNew(%p,%p,%p)"
592            "\n bad input\n", frontETree, msIV, cumopsDV) ;
593    exit(-1) ;
594 }
595 nfront     = ETree_nfront(frontETree) ;
596 nvtx       = ETree_nvtx(frontETree) ;
597 tree       = ETree_tree(frontETree) ;
598 vtxToFront = ETree_vtxToFront(frontETree) ;
599 nodwghts   = ETree_nodwghts(frontETree) ;
600 bndwghts   = ETree_bndwghts(frontETree) ;
601 par        = ETree_par(frontETree) ;
602 DV_sizeAndEntries(cumopsDV, &nthread, &cumops) ;
603 DV_zero(cumopsDV) ;
604 ms = IV_entries(msIV) ;
605 ownersIV = IV_new() ;
606 IV_init(ownersIV, nfront, NULL) ;
607 owners = IV_entries(ownersIV) ;
608 /*
609    --------------------------------------
610    compute the frontToRegion[] map vector
611    --------------------------------------
612 */
613 frontToRegion = IVinit(nfront, -1) ;
614 for ( v = 0 ; v < nvtx ; v++ ) {
615    J = vtxToFront[v] ;
616    frontToRegion[J] = ms[v] ;
617 }
618 #if MYDEBUG > 0
619 fprintf(stdout, "\n frontToRegion[] from msIV") ;
620 IVfprintf(stdout, nfront, frontToRegion) ;
621 #endif
622 ndom = 0 ;
623 for ( J = Tree_preOTfirst(tree) ;
624       J != -1 ;
625       J = Tree_preOTnext(tree, J) ) {
626    if ( frontToRegion[J] != 0 ) {
627       if ( (K = par[J]) != -1 ) {
628          if ( frontToRegion[K] == 0 ) {
629             frontToRegion[J] = ++ndom ;
630          } else {
631             frontToRegion[J] = frontToRegion[K] ;
632          }
633       } else {
634          frontToRegion[J] = ++ndom ;
635       }
636    }
637 }
638 #if MYDEBUG > 0
639 fprintf(stdout, "\n frontToRegion[], separate domains") ;
640 IVfprintf(stdout, nfront, frontToRegion) ;
641 #endif
642 /*
643    ---------------------------------
644    get a vector of forward op counts
645    ---------------------------------
646 */
647 forwardOpsDV = ETree_forwardOps(frontETree, type, symflag) ;
648 forwardOps = DV_entries(forwardOpsDV) ;
649 #if MYDEBUG > 0
650 fprintf(stdout, "\n forwardOpsDV") ;
651 DV_writeForHumanEye(forwardOpsDV, stdout) ;
652 #endif
653 /*
654    ----------------------------------------
655    for each domain, compute the forward ops
656    ----------------------------------------
657 */
658 domprios = DVinit(ndom+1, 0.0) ;
659 for ( J = 0 ; J < nfront ; J++ ) {
660    if ( (ireg = frontToRegion[J]) > 0 ) {
661      domprios[ireg] += forwardOps[J] ;
662    }
663 }
664 #if MYDEBUG > 0
665 fprintf(stdout, "\n domprios[], sum = %.0f",
666         DVsum(ndom, domprios+1)) ;
667 DVfprintf(stdout, ndom+1, domprios) ;
668 #endif
669 /*
670    --------------------------------------------------------
671    sort the domains by descending order of their operations
672    --------------------------------------------------------
673 */
674 domids = IVinit(ndom, -1) ;
675 IVramp(ndom, domids, 1, 1) ;
676 #if MYDEBUG > 0
677 fprintf(stdout, "\n before sort: domids") ;
678 IVfprintf(stdout, ndom, domids) ;
679 fprintf(stdout, "\n before sort: domprios") ;
680 DVfprintf(stdout, ndom, domprios+1) ;
681 #endif
682 DVIVqsortDown(ndom, domprios + 1, domids) ;
683 #if MYDEBUG > 0
684 fprintf(stdout, "\n after sort: domids") ;
685 IVfprintf(stdout, ndom, domids) ;
686 fprintf(stdout, "\n after sort: domprios") ;
687 DVfprintf(stdout, ndom, domprios+1) ;
688 #endif
689 /*
690    ----------------------------
691    assign domains to processors
692    ----------------------------
693 */
694 dommap = IVinit(ndom+1, -1) ;
695 domops = DVinit(nthread, 0.0) ;
696 for ( idom = 0 ; idom < ndom ; idom++ ) {
697    domid = domids[idom] ;
698    jthread = 0 ;
699    minops = domops[0] ;
700    for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
701       if ( domops[ithread] < minops ) {
702          jthread = ithread ;
703          minops  = domops[ithread] ;
704       }
705    }
706    dommap[domid] = jthread ;
707    domops[jthread] += domprios[idom+1] ;
708 #if MYDEBUG > 0
709    fprintf(stdout,
710   "\n assigning domain %d with ops %.0f to thread %d, new ops = %.0f",
711            domid, domprios[idom+1], jthread, domops[jthread]) ;
712 #endif
713 }
714 #if MYDEBUG > 0
715 fprintf(stdout, "\n domops[]") ;
716 DVfprintf(stdout, nthread, domops) ;
717 #endif
718 /*
719    -------------------------------------
720    assign fronts in domains to processes
721    -------------------------------------
722 */
723 for ( J = 0 ; J < nfront ; J++ ) {
724    if ( (idom = frontToRegion[J]) > 0 ) {
725       owners[J] = dommap[idom] ;
726    }
727 }
728 /*
729    -----------------------------------------------------
730    now compute priorities of the schur complement fronts
731    -----------------------------------------------------
732 */
733 schurprios = DVinit(nfront, 0.0) ;
734 for ( J = 0 ; J < nfront ; J++ ) {
735    if ( frontToRegion[J] == 0 ) {
736       nJ    = nodwghts[J] ;
737       nbndJ = bndwghts[J] ;
738       schurprios[J] = nJ*nJ*(nJ + nbndJ) ;
739    }
740 }
741 #if MYDEBUG > 0
742 fprintf(stdout, "\n schur front ops") ;
743 DVfprintf(stdout, nfront, schurprios) ;
744 #endif
745 for ( J = Tree_preOTfirst(tree) ;
746       J != -1 ;
747       J = Tree_preOTnext(tree, J) ) {
748    if ( frontToRegion[J] == 0 ) {
749       if ( (K = par[J]) != -1 ) {
750          schurprios[J] += schurprios[K] ;
751       }
752    }
753 }
754 /*
755    --------------------------------------------------
756    sort the priorities of the schur complement fronts
757    in descending order of their priorities
758    --------------------------------------------------
759 */
760 schurids = IVinit(nfront, -1) ;
761 for ( J = nschur = 0 ; J < nfront ; J++ ) {
762    if ( frontToRegion[J] == 0 ) {
763       schurids[nschur]   = J ;
764       schurprios[nschur] = schurprios[J] ;
765       nschur++ ;
766    }
767 }
768 #if MYDEBUG > 0
769 fprintf(stdout, "\n before sort: schurids") ;
770 IVfprintf(stdout, nschur, schurids) ;
771 fprintf(stdout, "\n before sort: schurprios") ;
772 DVfprintf(stdout, nschur, schurprios) ;
773 #endif
774 DVIVqsortDown(nschur, schurprios, schurids) ;
775 #if MYDEBUG > 0
776 fprintf(stdout, "\n after sort: schurids") ;
777 IVfprintf(stdout, nschur, schurids) ;
778 fprintf(stdout, "\n after sort: schurprios") ;
779 DVfprintf(stdout, nschur, schurprios) ;
780 #endif
781 /*
782    ---------------------------------
783    assign schur fronts to processors
784    ---------------------------------
785 */
786 schurops = DVinit(nthread, 0.0) ;
787 for ( ireg = 0 ; ireg < nschur ; ireg++ ) {
788    J = schurids[ireg] ;
789    jthread = 0 ;
790    minops = schurops[0] ;
791    for ( ithread = 1 ; ithread < nthread ; ithread++ ) {
792       if ( schurops[ithread] < minops ) {
793          jthread = ithread ;
794          minops  = schurops[ithread] ;
795       }
796    }
797    owners[J] = jthread ;
798    schurops[jthread] += forwardOps[J] ;
799 #if MYDEBUG > 0
800    fprintf(stdout,
801            "\n assigning schur front %d to thread %d, new ops = %.0f",
802            J, jthread, schurops[jthread]) ;
803 #endif
804 }
805 /*
806    ------------------------------
807    fill the cumulative ops vector
808    ------------------------------
809 */
810 for ( jthread = 0 ; jthread < nthread ; jthread++ ) {
811    cumops[jthread] = domops[jthread] + schurops[jthread] ;
812 }
813 #if MYDEBUG > 0
814 fprintf(stdout, "\n domops[]") ;
815 DVfprintf(stdout, nthread, domops) ;
816 fprintf(stdout, "\n schurops[]") ;
817 DVfprintf(stdout, nthread, schurops) ;
818 fprintf(stdout, "\n sum(domops) = %.0f", DVsum(nthread, domops)) ;
819 fprintf(stdout, "\n sum(schurops) = %.0f", DVsum(nthread, schurops)) ;
820 fprintf(stdout, "\n sum(cumops) = %.0f", DV_sum(cumopsDV)) ;
821 #endif
822 /*
823    ------------------------
824    free the working storage
825    ------------------------
826 */
827 IVfree(frontToRegion) ;
828 IVfree(domids) ;
829 IVfree(dommap) ;
830 IVfree(schurids) ;
831 DV_free(forwardOpsDV) ;
832 DVfree(domprios) ;
833 DVfree(domops) ;
834 DVfree(schurprios) ;
835 DVfree(schurops) ;
836 
837 return(ownersIV) ; }
838 
839 /*--------------------------------------------------------------------*/
840